@braintrust/proxy
Version:
A proxy server that load balances across AI providers.
1,801 lines (1,795 loc) • 289 kB
JavaScript
"use strict";
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var index_exports = {};
__export(index_exports, {
CACHED_HEADER: () => CACHED_HEADER,
CACHE_HEADER: () => CACHE_HEADER,
CACHE_TTL_HEADER: () => CACHE_TTL_HEADER,
CREDS_CACHE_HEADER: () => CREDS_CACHE_HEADER,
ENDPOINT_NAME_HEADER: () => ENDPOINT_NAME_HEADER,
FORMAT_HEADER: () => FORMAT_HEADER,
NOOP_METER_PROVIDER: () => import_NoopMeterProvider.NOOP_METER_PROVIDER,
ORG_NAME_HEADER: () => ORG_NAME_HEADER,
ProxyBadRequestError: () => ProxyBadRequestError,
USED_ENDPOINT_HEADER: () => USED_ENDPOINT_HEADER,
aggregateMetrics: () => aggregateMetrics,
createEventStreamTransformer: () => createEventStreamTransformer,
exponentialBuckets: () => exponentialBuckets,
flattenChunks: () => flattenChunks,
flattenChunksArray: () => flattenChunksArray,
flushMetrics: () => flushMetrics,
getRandomInt: () => getRandomInt,
getTimestampInSeconds: () => getTimestampInSeconds,
guessSpanType: () => guessSpanType,
initMetrics: () => initMetrics,
isEmpty: () => isEmpty,
isObject: () => isObject,
linearBuckets: () => linearBuckets,
nowMs: () => nowMs,
parseAuthHeader: () => parseAuthHeader,
parseFileMetadataFromUrl: () => parseFileMetadataFromUrl,
parseNumericHeader: () => parseNumericHeader,
prometheusSerialize: () => prometheusSerialize,
proxyV1: () => proxyV1,
writeToReadable: () => writeToReadable
});
module.exports = __toCommonJS(index_exports);
// src/util.ts
var import_content_disposition = __toESM(require("content-disposition"));
function parseAuthHeader(headers) {
const authHeader = headers["authorization"];
let authValue = null;
if (Array.isArray(authHeader)) {
authValue = authHeader[authHeader.length - 1];
} else {
authValue = authHeader;
}
if (authValue) {
const parts = authValue.split(" ");
if (parts.length !== 2) {
return null;
}
return parts[1];
}
const apiKeyHeader = headers["x-api-key"];
if (apiKeyHeader) {
return Array.isArray(apiKeyHeader) ? apiKeyHeader[apiKeyHeader.length - 1] : apiKeyHeader;
}
return null;
}
function parseNumericHeader(headers, headerKey) {
let value = headers[headerKey];
if (Array.isArray(value)) {
value = value[value.length - 1];
}
if (value !== void 0) {
try {
return parseInt(value, 10);
} catch (e) {
}
}
return null;
}
function isObject(value) {
return value instanceof Object && !(value instanceof Array);
}
function getTimestampInSeconds() {
return Math.floor(Date.now() / 1e3);
}
function flattenChunksArray(allChunks) {
const flatArray = new Uint8Array(allChunks.reduce((a, b) => a + b.length, 0));
for (let i = 0, offset = 0; i < allChunks.length; i++) {
flatArray.set(allChunks[i], offset);
offset += allChunks[i].length;
}
return flatArray;
}
function flattenChunks(allChunks) {
const flatArray = flattenChunksArray(allChunks);
return new TextDecoder().decode(flatArray);
}
function isEmpty(a) {
return a === void 0 || a === null;
}
function getRandomInt(max) {
return Math.floor(Math.random() * max);
}
var ProxyBadRequestError = class extends Error {
constructor(message) {
super(message);
this.message = message;
}
};
function parseFileMetadataFromUrl(url) {
var _a;
try {
if (!url || url.trim() === "") {
return void 0;
}
let parsedUrl;
try {
parsedUrl = new URL(url);
} catch (e) {
return void 0;
}
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
return void 0;
} else if (!parsedUrl.pathname || parsedUrl.pathname === "/" || parsedUrl.pathname.endsWith("/")) {
return void 0;
}
let filename = parsedUrl.pathname.split("/").pop();
if (!filename) {
return void 0;
}
let contentType = void 0;
if (parsedUrl.searchParams.get("X-Amz-Expires") !== null) {
const disposition = import_content_disposition.default.parse(
parsedUrl.searchParams.get("response-content-disposition") || ""
);
filename = disposition.parameters.filename ? decodeURIComponent(disposition.parameters.filename) : filename;
contentType = (_a = parsedUrl.searchParams.get("response-content-type")) != null ? _a : void 0;
}
try {
filename = decodeURIComponent(filename);
} catch (e) {
}
return { filename, contentType, url: parsedUrl };
} catch (e) {
return void 0;
}
}
var writeToReadable = (response) => {
return new ReadableStream({
start(controller) {
controller.enqueue(new TextEncoder().encode(response));
controller.close();
}
});
};
// src/proxy.ts
var import_json_schema_ref_parser = __toESM(require("@apidevtools/json-schema-ref-parser"));
var import_core3 = require("@braintrust/core");
var import_typespecs3 = require("@braintrust/core/typespecs");
// schema/index.ts
var import_zod7 = require("zod");
// schema/models.ts
var import_zod = require("zod");
// schema/model_list.json
var model_list_default = {
"gpt-4o": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2.5,
output_cost_per_mil_tokens: 10,
input_cache_read_cost_per_mil_tokens: 1.25,
displayName: "GPT-4o",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-2024-11-20": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2.5,
output_cost_per_mil_tokens: 10,
input_cache_read_cost_per_mil_tokens: 1.25,
parent: "gpt-4o",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-2024-08-06": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2.5,
output_cost_per_mil_tokens: 10,
input_cache_read_cost_per_mil_tokens: 1.25,
parent: "gpt-4o",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-2024-05-13": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 5,
output_cost_per_mil_tokens: 15,
parent: "gpt-4o",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4.1": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 8,
input_cache_read_cost_per_mil_tokens: 0.5,
displayName: "GPT-4.1",
max_input_tokens: 1047576,
max_output_tokens: 32768
},
"gpt-4.1-2025-04-14": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 8,
input_cache_read_cost_per_mil_tokens: 0.5,
parent: "gpt-4.1",
max_input_tokens: 1047576,
max_output_tokens: 32768
},
"gpt-4o-mini": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.6,
input_cache_read_cost_per_mil_tokens: 0.075,
displayName: "GPT-4o mini",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-mini-2024-07-18": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.6,
input_cache_read_cost_per_mil_tokens: 0.075,
parent: "gpt-4o-mini",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4.1-mini": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.4,
output_cost_per_mil_tokens: 1.6,
input_cache_read_cost_per_mil_tokens: 0.1,
displayName: "GPT-4.1 mini",
max_input_tokens: 1047576,
max_output_tokens: 32768
},
"gpt-4.1-mini-2025-04-14": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.4,
output_cost_per_mil_tokens: 1.6,
input_cache_read_cost_per_mil_tokens: 0.1,
parent: "gpt-4.1-mini",
max_input_tokens: 1047576,
max_output_tokens: 32768
},
"gpt-4.1-nano": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.4,
input_cache_read_cost_per_mil_tokens: 0.025,
displayName: "GPT-4.1 nano",
max_input_tokens: 1047576,
max_output_tokens: 32768
},
"gpt-4.1-nano-2025-04-14": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.4,
input_cache_read_cost_per_mil_tokens: 0.025,
parent: "gpt-4.1-nano",
max_input_tokens: 1047576,
max_output_tokens: 32768
},
"o4-mini": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 1.1,
output_cost_per_mil_tokens: 4.4,
input_cache_read_cost_per_mil_tokens: 0.275,
reasoning: true,
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o4-mini-2025-04-16": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 1.1,
output_cost_per_mil_tokens: 4.4,
input_cache_read_cost_per_mil_tokens: 0.275,
reasoning: true,
parent: "o4-mini",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o3-mini": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 1.1,
output_cost_per_mil_tokens: 4.4,
input_cache_read_cost_per_mil_tokens: 0.55,
reasoning: true,
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o3-mini-2025-01-31": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 1.1,
output_cost_per_mil_tokens: 4.4,
input_cache_read_cost_per_mil_tokens: 0.55,
reasoning: true,
parent: "o3-mini",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o3-pro": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 20,
output_cost_per_mil_tokens: 40,
reasoning: true,
displayName: "o3 Pro",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o3-pro-2025-06-10": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 20,
output_cost_per_mil_tokens: 40,
reasoning: true,
parent: "o3-pro",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
o3: {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 8,
input_cache_read_cost_per_mil_tokens: 0.5,
reasoning: true,
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o3-2025-04-16": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 8,
input_cache_read_cost_per_mil_tokens: 0.5,
reasoning: true,
parent: "o3",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
o1: {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 60,
input_cache_read_cost_per_mil_tokens: 7.5,
reasoning: true,
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o1-2024-12-17": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 60,
input_cache_read_cost_per_mil_tokens: 7.5,
reasoning: true,
parent: "o1",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o1-mini": {
format: "openai",
flavor: "chat",
multimodal: false,
input_cost_per_mil_tokens: 1.1,
output_cost_per_mil_tokens: 4.4,
input_cache_read_cost_per_mil_tokens: 0.55,
reasoning: true,
max_input_tokens: 128e3,
max_output_tokens: 65536
},
"o1-mini-2024-09-12": {
format: "openai",
flavor: "chat",
multimodal: false,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 12,
input_cache_read_cost_per_mil_tokens: 1.5,
reasoning: true,
parent: "o1-mini",
max_input_tokens: 128e3,
max_output_tokens: 65536
},
"o1-pro": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 150,
output_cost_per_mil_tokens: 600,
reasoning: true,
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"o1-pro-2025-03-19": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 150,
output_cost_per_mil_tokens: 600,
reasoning: true,
parent: "o1-pro",
max_input_tokens: 2e5,
max_output_tokens: 1e5
},
"chatgpt-4o-latest": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 5,
output_cost_per_mil_tokens: 15,
displayName: "ChatGPT-4o",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4-turbo": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
displayName: "GPT-4 Turbo",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4-turbo-2024-04-09": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
parent: "gpt-4-turbo",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4-turbo-preview": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
parent: "gpt-4-turbo",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 30,
output_cost_per_mil_tokens: 60,
displayName: "GPT-4",
max_input_tokens: 8192,
max_output_tokens: 4096
},
"gpt-4-0125-preview": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
experimental: true,
parent: "gpt-4",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4-1106-preview": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
experimental: true,
parent: "gpt-4",
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4-0613": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 30,
output_cost_per_mil_tokens: 60,
parent: "gpt-4",
max_input_tokens: 8192,
max_output_tokens: 4096
},
"gpt-4-0314": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 30,
output_cost_per_mil_tokens: 60,
parent: "gpt-4",
max_input_tokens: 8192,
max_output_tokens: 4096
},
"gpt-4.5-preview": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 75,
output_cost_per_mil_tokens: 150,
input_cache_read_cost_per_mil_tokens: 37.5,
displayName: "GPT-4.5",
experimental: true,
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4.5-preview-2025-02-27": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 75,
output_cost_per_mil_tokens: 150,
input_cache_read_cost_per_mil_tokens: 37.5,
experimental: true,
parent: "gpt-4.5-preview",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"o1-preview": {
format: "openai",
flavor: "chat",
multimodal: false,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 60,
input_cache_read_cost_per_mil_tokens: 7.5,
reasoning: true,
experimental: true,
parent: "o1",
max_input_tokens: 128e3,
max_output_tokens: 32768
},
"o1-preview-2024-09-12": {
format: "openai",
flavor: "chat",
multimodal: false,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 60,
input_cache_read_cost_per_mil_tokens: 7.5,
reasoning: true,
experimental: true,
parent: "o1",
max_input_tokens: 128e3,
max_output_tokens: 32768
},
"gpt-4o-search-preview": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2.5,
output_cost_per_mil_tokens: 10,
input_cache_read_cost_per_mil_tokens: 1.25,
displayName: "GPT-4o Search Preview",
experimental: true,
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-search-preview-2025-03-11": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2.5,
output_cost_per_mil_tokens: 10,
input_cache_read_cost_per_mil_tokens: 1.25,
experimental: true,
parent: "gpt-4o-search-preview",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-mini-search-preview": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.6,
input_cache_read_cost_per_mil_tokens: 0.075,
displayName: "GPT-4o mini Search Preview",
experimental: true,
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-4o-mini-search-preview-2025-03-11": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.6,
input_cache_read_cost_per_mil_tokens: 0.075,
experimental: true,
parent: "gpt-4o-mini-search-preview",
max_input_tokens: 128e3,
max_output_tokens: 16384
},
"gpt-3.5-turbo-0125": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.5,
output_cost_per_mil_tokens: 1.5,
displayName: "GPT 3.5T 0125",
deprecated: true,
max_input_tokens: 16385,
max_output_tokens: 4096
},
"gpt-3.5-turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.5,
output_cost_per_mil_tokens: 2,
displayName: "GPT 3.5T",
deprecated: true,
max_input_tokens: 16385,
max_output_tokens: 4096
},
"gpt-35-turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.5,
output_cost_per_mil_tokens: 1.5,
displayName: "GPT 3.5T",
deprecated: true
},
"gpt-3.5-turbo-1106": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1,
output_cost_per_mil_tokens: 2,
displayName: "GPT 3.5T 1106",
deprecated: true,
max_input_tokens: 16385,
max_output_tokens: 4096
},
"gpt-3.5-turbo-instruct": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 1.5,
output_cost_per_mil_tokens: 2,
displayName: "GPT 3.5T Instruct",
deprecated: true,
max_input_tokens: 8192,
max_output_tokens: 4096
},
"gpt-3.5-turbo-instruct-0914": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 1.5,
output_cost_per_mil_tokens: 2,
displayName: "GPT 3.5T Instruct 0914",
deprecated: true,
max_input_tokens: 8192,
max_output_tokens: 4097
},
"gpt-4-32k": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 60,
output_cost_per_mil_tokens: 120,
displayName: "GPT 4 32k",
deprecated: true,
max_input_tokens: 32768,
max_output_tokens: 4096
},
"gpt-4-32k-0613": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 60,
output_cost_per_mil_tokens: 120,
displayName: "GPT 4 32k 0613",
deprecated: true,
max_input_tokens: 32768,
max_output_tokens: 4096
},
"gpt-4-32k-0314": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 60,
output_cost_per_mil_tokens: 120,
displayName: "GPT 4 32k 0314",
deprecated: true,
max_input_tokens: 32768,
max_output_tokens: 4096
},
"gpt-4-vision-preview": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
displayName: "GPT 4 Vision-Preview",
deprecated: true,
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-4-1106-vision-preview": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 10,
output_cost_per_mil_tokens: 30,
displayName: "GPT 4 1106 Vision-Preview",
deprecated: true,
max_input_tokens: 128e3,
max_output_tokens: 4096
},
"gpt-3.5-turbo-16k": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 4,
displayName: "GPT 3.5T 16k",
deprecated: true,
max_input_tokens: 16385,
max_output_tokens: 4096
},
"gpt-35-turbo-16k": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 4,
displayName: "GPT 3.5T 16k",
deprecated: true
},
"gpt-3.5-turbo-16k-0613": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 4,
displayName: "GPT 3.5T 16k 0613",
deprecated: true,
max_input_tokens: 16385,
max_output_tokens: 4096
},
"gpt-3.5-turbo-0613": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.5,
output_cost_per_mil_tokens: 2,
displayName: "GPT 3.5T 0613",
deprecated: true,
max_input_tokens: 4097,
max_output_tokens: 4096
},
"gpt-3.5-turbo-0301": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.5,
output_cost_per_mil_tokens: 2,
displayName: "GPT 3.5T 0301",
deprecated: true,
max_input_tokens: 4097,
max_output_tokens: 4096
},
"text-davinci-003": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 2,
displayName: "Text Davinci 003",
deprecated: true
},
"claude-sonnet-4-20250514": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
displayName: "Claude 4 Sonnet",
reasoning: true,
reasoning_budget: true,
max_input_tokens: 2e5,
max_output_tokens: 64e3
},
"claude-4-sonnet-20250514": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
displayName: "Claude 4 Sonnet (old naming format)",
deprecated: true,
max_input_tokens: 2e5,
max_output_tokens: 64e3
},
"claude-3-7-sonnet-latest": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
displayName: "Claude 3.7 Sonnet",
reasoning: true,
reasoning_budget: true,
max_input_tokens: 2e5,
max_output_tokens: 128e3
},
"claude-3-7-sonnet-20250219": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
reasoning: true,
reasoning_budget: true,
parent: "claude-3-7-sonnet-latest",
max_input_tokens: 2e5,
max_output_tokens: 128e3
},
"claude-3-5-haiku-latest": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 1,
output_cost_per_mil_tokens: 5,
input_cache_read_cost_per_mil_tokens: 0.1,
input_cache_write_cost_per_mil_tokens: 1.25,
displayName: "Claude 3.5 Haiku",
max_input_tokens: 2e5,
max_output_tokens: 8192
},
"claude-3-5-haiku-20241022": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 4,
input_cache_read_cost_per_mil_tokens: 0.08,
input_cache_write_cost_per_mil_tokens: 1,
parent: "claude-3-5-haiku-latest",
max_input_tokens: 2e5,
max_output_tokens: 8192
},
"claude-3-5-sonnet-latest": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
displayName: "Claude 3.5 Sonnet",
max_input_tokens: 2e5,
max_output_tokens: 8192
},
"claude-3-5-sonnet-20241022": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
parent: "claude-3-5-sonnet-latest",
max_input_tokens: 2e5,
max_output_tokens: 8192
},
"claude-3-5-sonnet-20240620": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
input_cache_read_cost_per_mil_tokens: 0.3,
input_cache_write_cost_per_mil_tokens: 3.75,
parent: "claude-3-5-sonnet-latest",
max_input_tokens: 2e5,
max_output_tokens: 8192
},
"claude-opus-4-20250514": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 75,
input_cache_read_cost_per_mil_tokens: 1.5,
input_cache_write_cost_per_mil_tokens: 18.75,
displayName: "Claude 4 Opus",
reasoning: true,
reasoning_budget: true,
max_input_tokens: 2e5,
max_output_tokens: 32e3
},
"claude-4-opus-20250514": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 75,
input_cache_read_cost_per_mil_tokens: 1.5,
input_cache_write_cost_per_mil_tokens: 18.75,
displayName: "Claude 4 Opus (old naming format)",
deprecated: true,
max_input_tokens: 2e5,
max_output_tokens: 32e3
},
"claude-3-opus-latest": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 75,
input_cache_read_cost_per_mil_tokens: 1.5,
input_cache_write_cost_per_mil_tokens: 18.75,
displayName: "Claude 3 Opus",
max_input_tokens: 2e5,
max_output_tokens: 4096
},
"claude-3-opus-20240229": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 15,
output_cost_per_mil_tokens: 75,
input_cache_read_cost_per_mil_tokens: 1.5,
input_cache_write_cost_per_mil_tokens: 18.75,
parent: "claude-3-opus-latest",
max_input_tokens: 2e5,
max_output_tokens: 4096
},
"claude-3-sonnet-20240229": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
displayName: "Claude 3 Sonnet",
max_input_tokens: 2e5,
max_output_tokens: 4096
},
"claude-3-haiku-20240307": {
format: "anthropic",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.25,
output_cost_per_mil_tokens: 1.25,
input_cache_read_cost_per_mil_tokens: 0.03,
input_cache_write_cost_per_mil_tokens: 0.3,
displayName: "Claude 3 Haiku",
max_input_tokens: 2e5,
max_output_tokens: 4096
},
"claude-instant-1.2": {
format: "anthropic",
flavor: "chat",
input_cost_per_mil_tokens: 0.163,
output_cost_per_mil_tokens: 0.551,
displayName: "Claude Instant 1.2",
deprecated: true,
max_input_tokens: 1e5,
max_output_tokens: 8191
},
"claude-instant-1": {
format: "anthropic",
flavor: "chat",
input_cost_per_mil_tokens: 1.63,
output_cost_per_mil_tokens: 5.51,
displayName: "Claude Instant 1",
deprecated: true,
max_input_tokens: 1e5,
max_output_tokens: 8191
},
"claude-2.1": {
format: "anthropic",
flavor: "chat",
input_cost_per_mil_tokens: 8,
output_cost_per_mil_tokens: 24,
displayName: "Claude 2.1",
deprecated: true,
max_input_tokens: 2e5,
max_output_tokens: 8191
},
"claude-2.0": {
format: "anthropic",
flavor: "chat",
input_cost_per_mil_tokens: 8,
output_cost_per_mil_tokens: 24,
displayName: "Claude 2.0",
deprecated: true
},
"claude-2": {
format: "anthropic",
flavor: "chat",
input_cost_per_mil_tokens: 8,
output_cost_per_mil_tokens: 24,
displayName: "Claude 2",
deprecated: true,
max_input_tokens: 1e5,
max_output_tokens: 8191
},
"meta/llama-2-70b-chat": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.65,
output_cost_per_mil_tokens: 2.75,
displayName: "LLaMA 2 70b Chat"
},
mistral: {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0,
output_cost_per_mil_tokens: 0
},
phi: {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0,
output_cost_per_mil_tokens: 0,
deprecated: true
},
sonar: {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1,
output_cost_per_mil_tokens: 1,
displayName: "Sonar"
},
"sonar-pro": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 3,
output_cost_per_mil_tokens: 15,
displayName: "Sonar Pro"
},
"sonar-reasoning": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1,
output_cost_per_mil_tokens: 5,
displayName: "Sonar Reasoning"
},
"sonar-reasoning-pro": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 8,
displayName: "Sonar Reasoning Pro"
},
"r1-1776": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 8,
displayName: "R1 1776"
},
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
format: "openai",
flavor: "chat",
multimodal: true,
displayName: "Llama 4 Maverick Instruct (17Bx128E)"
},
"meta-llama/Llama-4-Scout-17B-16E-Instruct": {
format: "openai",
flavor: "chat",
displayName: "Llama 4 Scout Instruct (17Bx16E)"
},
"meta-llama/Llama-3.3-70B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.88,
output_cost_per_mil_tokens: 0.88,
displayName: "Llama 3.3 70B Instruct Turbo"
},
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0,
output_cost_per_mil_tokens: 0,
displayName: "Llama 3.3 70B Instruct Turbo Free"
},
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 1.2,
output_cost_per_mil_tokens: 1.2,
displayName: "Llama 3.2 90B Vision Instruct Turbo"
},
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.18,
output_cost_per_mil_tokens: 0.18,
displayName: "Llama 3.2 11B Vision Instruct Turbo"
},
"meta-llama/Llama-Vision-Free": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0,
output_cost_per_mil_tokens: 0,
displayName: "Llama Vision Free"
},
"meta-llama/Llama-3.2-3B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.06,
output_cost_per_mil_tokens: 0.06,
displayName: "Llama 3.2 3B Instruct Turbo"
},
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 3.5,
output_cost_per_mil_tokens: 3.5,
displayName: "Llama 3.1 405B Instruct Turbo"
},
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.88,
output_cost_per_mil_tokens: 0.88,
displayName: "Llama 3.1 70B Instruct Turbo"
},
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.18,
output_cost_per_mil_tokens: 0.18,
displayName: "Llama 3.1 8B Instruct Turbo"
},
"meta-llama/Llama-3-70b-chat-hf": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.9,
output_cost_per_mil_tokens: 0.9,
displayName: "Llama 3 70B Instruct Reference"
},
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.88,
output_cost_per_mil_tokens: 0.88,
displayName: "Llama 3 70B Instruct Turbo"
},
"meta-llama/Meta-Llama-3-70B-Instruct-Lite": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.54,
output_cost_per_mil_tokens: 0.54,
displayName: "Llama 3 70B Instruct Lite"
},
"meta-llama/Llama-3-8b-chat-hf": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Llama 3 8B Instruct Reference"
},
"meta-llama/Meta-Llama-3-8B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.18,
output_cost_per_mil_tokens: 0.18,
displayName: "Llama 3 8B Instruct Turbo"
},
"meta-llama/Meta-Llama-3-8B-Instruct-Lite": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.1,
displayName: "Llama 3 8B Instruct Lite"
},
"google/gemma-2-27b-it": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.35,
output_cost_per_mil_tokens: 1.05,
displayName: "Gemma-2 Instruct (27B)",
max_output_tokens: 8192
},
"google/gemma-2-9b-it": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.35,
output_cost_per_mil_tokens: 1.05,
displayName: "Gemma-2 Instruct (9B)",
max_output_tokens: 8192
},
"google/gemma-2b-it": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.1,
displayName: "Gemma Instruct (2B)"
},
"mistralai/Mistral-Small-24B-Instruct-2501": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 0.8,
displayName: "Mistral Small (24B) Instruct 25.01"
},
"mistralai/Mistral-7B-Instruct-v0.3": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Mistral (7B) Instruct v0.3"
},
"mistralai/Mistral-7B-Instruct-v0.2": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Mistral (7B) Instruct v0.2"
},
"mistralai/Mistral-7B-Instruct-v0.1": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Mistral (7B) Instruct"
},
"mistralai/Mixtral-8x22B-Instruct-v0.1": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.2,
output_cost_per_mil_tokens: 1.2,
displayName: "Mixtral 8x22B Instruct v0.1"
},
"mistralai/Mixtral-8x7B-Instruct-v0.1": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.6,
output_cost_per_mil_tokens: 0.6,
displayName: "Mixtral 8x7B Instruct v0.1"
},
"deepseek-ai/DeepSeek-V3": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.25,
output_cost_per_mil_tokens: 1.25,
displayName: "DeepSeek V3"
},
"deepseek-ai/DeepSeek-R1": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 7,
output_cost_per_mil_tokens: 7,
displayName: "DeepSeek R1"
},
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 2,
displayName: "DeepSeek R1 Distill Llama 70B"
},
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-Free": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0,
output_cost_per_mil_tokens: 0,
displayName: "DeepSeek R1 Distill Llama 70B Free"
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.6,
output_cost_per_mil_tokens: 1.6,
displayName: "DeepSeek R1 Distill Qwen 14B"
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.18,
output_cost_per_mil_tokens: 0.18,
displayName: "DeepSeek R1 Distill Qwen 1.5B"
},
"deepseek-ai/deepseek-llm-67b-chat": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.9,
output_cost_per_mil_tokens: 0.9,
displayName: "DeepSeek LLM Chat (67B)"
},
"Qwen/Qwen2.5-72B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.2,
output_cost_per_mil_tokens: 1.2,
displayName: "Qwen 2.5 72B Instruct Turbo"
},
"Qwen/Qwen2.5-7B-Instruct-Turbo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.3,
output_cost_per_mil_tokens: 0.3,
displayName: "Qwen 2.5 7B Instruct Turbo"
},
"Qwen/Qwen2.5-Coder-32B-Instruct": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 0.8,
displayName: "Qwen 2.5 Coder 32B Instruct"
},
"Qwen/QwQ-32B": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 0.8,
displayName: "Qwen QwQ 32B"
},
"Qwen/Qwen2-VL-72B-Instruct": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.2,
output_cost_per_mil_tokens: 1.2,
displayName: "Qwen-2VL (72B) Instruct"
},
"Qwen/Qwen2-72B-Instruct": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.9,
output_cost_per_mil_tokens: 0.9,
displayName: "Qwen 2 Instruct (72B)"
},
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.88,
output_cost_per_mil_tokens: 0.88,
displayName: "Llama 3.1 Nemotron 70B Instruct HF"
},
"microsoft/WizardLM-2-8x22B": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.2,
output_cost_per_mil_tokens: 1.2,
displayName: "WizardLM-2 (8x22B)"
},
"databricks/dbrx-instruct": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1.2,
output_cost_per_mil_tokens: 1.2,
displayName: "DBRX Instruct"
},
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.6,
output_cost_per_mil_tokens: 0.6,
displayName: "Nous Hermes 2 - Mixtral 8x7B-DPO"
},
"Gryphe/MythoMax-L2-13b": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.3,
output_cost_per_mil_tokens: 0.3,
displayName: "MythoMax-L2 (13B)"
},
"Gryphe/MythoMax-L2-13b-Lite": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.1,
displayName: "Gryphe MythoMax L2 Lite (13B)"
},
"meta-llama/Meta-Llama-3-70B": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 0.9,
output_cost_per_mil_tokens: 0.9,
displayName: "Llama 3 70b",
deprecated: true
},
"meta-llama/Llama-3-8b-hf": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Llama 3 8b HF",
deprecated: true
},
"meta-llama/Llama-2-70b-chat-hf": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.9,
output_cost_per_mil_tokens: 0.9,
displayName: "Llama 2 70b Chat HF",
deprecated: true
},
"deepseek-ai/deepseek-coder-33b-instruct": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 0.8,
displayName: "Deepseek Coder 33b Instruct",
deprecated: true
},
"Qwen/QwQ-32B-Preview": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 0.8,
displayName: "Qwen QwQ 32B Preview",
deprecated: true
},
"NousResearch/Nous-Hermes-2-Yi-34B": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.8,
output_cost_per_mil_tokens: 0.8,
displayName: "Nous Hermes 2 Yi 34B",
deprecated: true
},
"magistral-medium-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 5,
displayName: "Magistral Medium Latest",
max_input_tokens: 40960,
max_output_tokens: 4e4
},
"magistral-medium-2506": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 5,
parent: "magistral-medium-latest",
max_input_tokens: 40960,
max_output_tokens: 4e4
},
"magistral-small-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.5,
output_cost_per_mil_tokens: 1.5,
displayName: "Magistral Small Latest",
max_input_tokens: 4e4,
max_output_tokens: 4e4
},
"magistral-small-2506": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.5,
output_cost_per_mil_tokens: 1.5,
parent: "magistral-small-latest",
max_input_tokens: 4e4,
max_output_tokens: 4e4
},
"mistralai/mixtral-8x7b-32kseqlen": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 0.06,
output_cost_per_mil_tokens: 0.06,
displayName: "Mixtral 8x7B 32k",
deprecated: true
},
"mistralai/Mixtral-8x7B-Instruct-v0.1-json": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.6,
output_cost_per_mil_tokens: 0.6,
displayName: "Mixtral 8x7B Instruct v0.1 JSON",
deprecated: true
},
"mistralai/Mixtral-8x22B": {
format: "openai",
flavor: "completion",
input_cost_per_mil_tokens: 1.08,
output_cost_per_mil_tokens: 1.08,
displayName: "Mixtral 8x22B",
deprecated: true
},
"mistral-large-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 6,
displayName: "Mistral Large"
},
"mistral-large-2411": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 6,
parent: "mistral-large-latest"
},
"pixtral-large-latest": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 6,
displayName: "Pixtral Large"
},
"pixtral-large-2411": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 6,
parent: "pixtral-large-latest"
},
"mistral-medium-latest": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.4,
output_cost_per_mil_tokens: 2,
displayName: "Mistral Medium 3"
},
"mistral-medium-2505": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.4,
output_cost_per_mil_tokens: 2,
parent: "mistral-medium-latest"
},
"mistral-small-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.3,
displayName: "Mistral Small"
},
"mistral-small-2501": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.3,
parent: "mistral-small-latest"
},
"codestral-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.3,
output_cost_per_mil_tokens: 0.9,
displayName: "Codestral"
},
"codestral-2501": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.3,
output_cost_per_mil_tokens: 0.9,
parent: "codestral-latest"
},
"ministral-8b-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.1,
displayName: "Ministral 8B"
},
"ministral-8b-2410": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.1,
parent: "ministral-8b-latest"
},
"ministral-3b-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.04,
output_cost_per_mil_tokens: 0.04,
displayName: "Ministral 3B"
},
"ministral-3b-2410": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.04,
output_cost_per_mil_tokens: 0.04,
parent: "ministral-3b-latest"
},
"mistral-saba-latest": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.6,
displayName: "Mistral Saba"
},
"mistral-saba-2502": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.6,
parent: "mistral-saba-latest"
},
"pixtral-12b-2409": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.15,
displayName: "Pixtral 12B"
},
"open-mistral-nemo": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.15,
displayName: "Mistral NeMo"
},
"open-mistral-nemo-2407": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.15
},
"open-codestral-mamba": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.6,
output_cost_per_mil_tokens: 0.6,
displayName: "Codestral Mamba"
},
"open-mixtral-8x22b": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2,
output_cost_per_mil_tokens: 6,
displayName: "Mixtral 8x22B",
deprecated: true
},
"mistral-tiny": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.15,
output_cost_per_mil_tokens: 0.46,
displayName: "Mistral Tiny",
deprecated: true
},
"mistral-small": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 1,
output_cost_per_mil_tokens: 3,
displayName: "Mistral Small",
deprecated: true
},
"mistral-medium": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 2.75,
output_cost_per_mil_tokens: 8.1,
displayName: "Mistral Medium",
deprecated: true
},
"llama-3.3-70b-versatile": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.59,
output_cost_per_mil_tokens: 0.79,
displayName: "Llama 3.3 70B Versatile 128k"
},
"llama-3.1-8b-instant": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.05,
output_cost_per_mil_tokens: 0.08,
displayName: "Llama 3.1 8B Instant 128k"
},
"llama3-70b-8192": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.64,
output_cost_per_mil_tokens: 0.8,
displayName: "Llama 3 70B 8k"
},
"llama3-8b-8192": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.1,
output_cost_per_mil_tokens: 0.1,
displayName: "Llama 3 8B 8k"
},
"llama-guard-3-8b": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Llama Guard 3 8B 8k"
},
"gemma2-9b-it": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.2,
output_cost_per_mil_tokens: 0.2,
displayName: "Gemma 2 9B"
},
"meta-llama/llama-4-maverick-17b-128e-instruct": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.5,
output_cost_per_mil_tokens: 0.77,
displayName: "Llama 4 Maverick (17Bx128E)",
experimental: true
},
"meta-llama/llama-4-scout-17b-16e-instruct": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.11,
output_cost_per_mil_tokens: 0.34,
displayName: "Llama 4 Scout (17Bx16E)",
experimental: true
},
"llama-3.3-70b-specdec": {
format: "openai",
flavor: "chat",
input_cost_per_mil_tokens: 0.59,
output_cost_per_mil_tokens: 0.99,
displayName: "Llama 3.3 70B SpecDec 8k",
experimental: true
},
"llama-3.2-90b-vision-preview": {
format: "openai",
flavor: "chat",
multimodal: true,
input_cost_per_mil_tokens: 0.9,
output_cost_per_mil_tokens: 0.9,
displayName: "Llama 3.2 90B Vision 8k (Preview)",
experimental: true
},
"llama-3.2-11b-vision-preview": {
format: "openai",
flavor: "cha