llm-info
Version:
Information on LLM models, context window token limit, output token limit, pricing and more
563 lines (558 loc) • 18.8 kB
text/typescript
import { ModelLike, ModelEnum, NonModelEnum } from './model';
import { ModelInfo, ModelInfoCurrent, ModelInfoLegacy } from './modelInfo';
import { AI_PROVIDERS } from './provider';
// Helper function to create ModelInfo with ID
function createModelInfo(
id: string,
info: Omit<ModelInfoCurrent, 'id'> | Omit<ModelInfoLegacy, 'id'>
): ModelInfo {
return {
...info,
id,
};
}
export const ModelInfoMap: Record<
ModelLike,
Omit<ModelInfoCurrent, 'id'> | Omit<ModelInfoLegacy, 'id'>
> = {
[ModelEnum['gpt-4']]: {
name: 'GPT-4',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 30,
pricePerMillionOutputTokens: 60,
tokenizerId: 'Xenova/gpt-4',
legacy: true,
legacyReason: 'Superceded by GPT-4o',
},
[ModelEnum['gpt-4-turbo']]: {
name: 'GPT-4 Turbo',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 10,
pricePerMillionOutputTokens: 30,
tokenizerId: 'Xenova/gpt-4',
legacy: true,
legacyReason: 'Superceded by GPT-4o',
},
[ModelEnum['gpt-4o']]: {
name: 'GPT-4o',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by GPT-5',
},
[ModelEnum['gpt-4o-mini']]: {
name: 'GPT-4o mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 4096,
pricePerMillionInputTokens: 0.15,
pricePerMillionOutputTokens: 0.6,
tokenizerId: 'Xenova/gpt-4o',
small: true,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by GPT-5 mini',
},
[ModelEnum['gpt-4o-64k-output-alpha']]: {
name: 'GPT-4o Long Output',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 64000,
pricePerMillionInputTokens: 6,
pricePerMillionOutputTokens: 18,
tokenizerId: 'Xenova/gpt-4o',
alpha: true,
legacy: true,
notes:
'OpenAI is offering an experimental version of GPT-4o with a maximum of 64K output tokens per request.',
notesUrl: 'https://openai.com/gpt-4o-long-output/',
supportsImageInput: true,
legacyReason: 'Superceded by GPT-4o',
},
[ModelEnum['gpt-4o-2024-08-06']]: {
name: 'GPT-4o 08-06',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 16384,
pricePerMillionInputTokens: 2.5,
pricePerMillionOutputTokens: 10,
tokenizerId: 'Xenova/gpt-4o',
notes:
'This model is a version of GPT-4o that was released on August 6, 2024. It has a maximum of 16K output tokens per request.',
legacy: true,
supportsImageInput: true,
legacyReason: 'Superceded by GPT-4o',
},
[ModelEnum['gpt-4.1']]: {
name: 'GPT-4.1',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 1047576,
outputTokenLimit: 32768,
pricePerMillionInputTokens: 2,
pricePerMillionOutputTokens: 8,
tokenizerId: 'Xenova/gpt-4o',
notes:
'GPT-4.1 is a flagship model for complex tasks. It is well suited for problem solving across domains. Knowledge cutoff: Jun 01, 2024.',
recommendedForCoding: true,
recommendedForWriting: true,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by GPT-5',
openRouterModelId: 'openai/gpt-4.1',
},
[ModelEnum['gpt-4.1-mini']]: {
name: 'GPT-4.1 mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 1047576,
outputTokenLimit: 32768,
pricePerMillionInputTokens: 0.4,
pricePerMillionOutputTokens: 1.6,
tokenizerId: 'Xenova/gpt-4o',
notes:
'GPT-4.1 mini provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases.',
small: true,
legacy: true,
legacyReason: 'Superceded by GPT-5 mini',
},
[ModelEnum['gpt-4.1-nano']]: {
name: 'GPT-4.1 nano',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 1047576,
outputTokenLimit: 32768,
pricePerMillionInputTokens: 0.1,
pricePerMillionOutputTokens: 0.4,
tokenizerId: 'Xenova/gpt-4o',
notes: 'GPT-4.1 nano is the fastest, most cost-effective GPT-4.1 model.',
small: true,
legacy: true,
legacyReason: 'Superceded by GPT-5 nano',
},
[ModelEnum['gpt-5']]: {
name: 'GPT-5',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 400000,
outputTokenLimit: 128000,
pricePerMillionInputTokens: 1.25,
pricePerMillionOutputTokens: 10,
tokenizerId: 'Xenova/gpt-4o',
supportsImageInput: true,
recommendedForCoding: true,
recommendedForWriting: true,
reasoning: true,
legacy: false,
openRouterModelId: 'openai/gpt-5',
},
[ModelEnum['gpt-5-mini']]: {
name: 'GPT-5 mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 400000,
outputTokenLimit: 128000,
pricePerMillionInputTokens: 0.25,
pricePerMillionOutputTokens: 2,
tokenizerId: 'Xenova/gpt-4o',
supportsImageInput: true,
reasoning: true,
small: true,
legacy: false,
openRouterModelId: 'openai/gpt-5-mini',
},
[ModelEnum['gpt-5-nano']]: {
name: 'GPT-5 nano',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 400000,
outputTokenLimit: 128000,
pricePerMillionInputTokens: 0.05,
pricePerMillionOutputTokens: 0.4,
tokenizerId: 'Xenova/gpt-4o',
supportsImageInput: true,
reasoning: true,
small: true,
legacy: false,
openRouterModelId: 'openai/gpt-5-mini',
},
[ModelEnum['o1-preview']]: {
name: 'o1-preview',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 32768,
pricePerMillionInputTokens: 15,
pricePerMillionOutputTokens: 60,
tokenizerId: 'Xenova/gpt-4o',
notes:
'An early preview of our o1 model, designed to reason about hard problems using broad general knowledge about the world.',
alpha: true,
legacy: true,
legacyReason: 'Superceded by o1',
},
[ModelEnum['o1-mini']]: {
name: 'o1-mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 128000,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 1.1,
pricePerMillionOutputTokens: 4.4,
tokenizerId: 'Xenova/gpt-4o',
notes:
"A faster and cheaper version of o1, particularly adept at coding, math, and science tasks where extensive general knowledge isn't required.",
small: true,
reasoning: true,
legacy: true,
legacyReason: 'Superceded by o3-mini',
},
[ModelEnum['o1']]: {
name: 'o1',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 200000,
outputTokenLimit: 100000,
pricePerMillionInputTokens: 15,
pricePerMillionOutputTokens: 60,
tokenizerId: 'Xenova/gpt-4o',
notes: 'Reasoning model designed to solve hard problems across domains.',
legacy: true,
reasoning: true,
supportsImageInput: true,
legacyReason: 'Superceded by o3',
},
[ModelEnum['o3']]: {
name: 'o3',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 200000,
outputTokenLimit: 100000,
pricePerMillionInputTokens: 10,
pricePerMillionOutputTokens: 40,
tokenizerId: 'Xenova/gpt-4o',
notes: 'o3 is a well-rounded and powerful model across domains.',
reasoning: true,
supportsImageInput: true,
legacy: false,
},
[ModelEnum['o3-mini']]: {
name: 'o3-mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 200000,
outputTokenLimit: 100000,
pricePerMillionInputTokens: 1.1,
pricePerMillionOutputTokens: 4.4,
tokenizerId: 'Xenova/gpt-4o',
reasoning: true,
small: true,
legacy: true,
legacyReason: 'Superceded by o4-mini',
},
[ModelEnum['o4-mini']]: {
name: 'o4-mini',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 200000,
outputTokenLimit: 100000,
pricePerMillionInputTokens: 1.1,
pricePerMillionOutputTokens: 4.4,
tokenizerId: 'Xenova/gpt-4o',
notes:
"o4-mini is OpenAI's latest small o-series model. It's optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks.",
reasoning: true,
small: true,
legacy: false,
openRouterModelId: 'openai/o4-mini',
},
[ModelEnum['claude-3-5-sonnet-20240620']]: {
name: 'Claude 3.5 Sonnet (Old)',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 8192,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/claude-tokenizer',
notes: '8192 output tokens is in beta.',
notesUrl: 'https://docs.anthropic.com/en/docs/about-claude/models',
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by new Claude 3.5 Sonnet',
},
[ModelEnum['claude-3-5-sonnet-20241022']]: {
name: 'Claude 3.5 Sonnet',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 8192,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/claude-tokenizer',
notes: 'New version of Claude 3.5 Sonnet released on October 22, 2024.',
notesUrl: 'https://www.anthropic.com/news/3-5-models-and-computer-use',
recommendedForCoding: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Claude 3.7 Sonnet and Claude Sonnet 4',
},
[ModelEnum['claude-3-5-haiku-20241022']]: {
name: 'Claude 3.5 Haiku',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 8192,
pricePerMillionInputTokens: 1,
pricePerMillionOutputTokens: 5,
tokenizerId: 'Xenova/claude-tokenizer',
supportsImageInput: true,
legacy: false,
},
[ModelEnum['claude-3-7-sonnet-20250219']]: {
name: 'Claude 3.7 Sonnet',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 64000,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/claude-tokenizer',
recommendedForCoding: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Claude Sonnet 4',
},
[ModelEnum['claude-opus-4-20250514']]: {
name: 'Claude Opus 4',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 32000,
pricePerMillionInputTokens: 15,
pricePerMillionOutputTokens: 75,
tokenizerId: 'Xenova/claude-tokenizer',
notes: 'Most intelligent model for complex tasks',
notesUrl: 'https://www.anthropic.com/claude/opus',
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Claude Opus 4.1',
openRouterModelId: 'anthropic/claude-opus-4',
},
[ModelEnum['claude-opus-4-1-20250805']]: {
name: 'Claude Opus 4.1',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 32000,
pricePerMillionInputTokens: 15,
pricePerMillionOutputTokens: 75,
tokenizerId: 'Xenova/claude-tokenizer',
notes: 'Most intelligent model for complex tasks',
notesUrl: 'https://www.anthropic.com/claude/opus',
supportsImageInput: true,
legacy: false,
openRouterModelId: 'anthropic/claude-opus-4.1',
},
[ModelEnum['claude-sonnet-4-20250514']]: {
name: 'Claude Sonnet 4',
provider: AI_PROVIDERS.ANTHROPIC,
contextWindowTokenLimit: 200000,
outputTokenLimit: 64000,
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/claude-tokenizer',
notes: 'Optimal balance of intelligence, cost, and speed',
notesUrl: 'https://www.anthropic.com/claude/sonnet',
supportsImageInput: true,
recommendedForCoding: true,
recommendedForWriting: true,
legacy: false,
openRouterModelId: 'anthropic/claude-sonnet-4',
},
[NonModelEnum['chatgpt']]: {
name: 'ChatGPT',
provider: AI_PROVIDERS.OPENAI,
contextWindowTokenLimit: 4096,
outputTokenLimit: 4096,
pricePerMillionInputTokens: null,
pricePerMillionOutputTokens: null,
tokenizerId: null,
legacy: false,
},
[ModelEnum['deepseek-chat']]: {
name: 'DeepSeek-V3 (New)',
provider: AI_PROVIDERS.DEEPSEEK,
contextWindowTokenLimit: 64000,
outputTokenLimit: 8000,
pricePerMillionInputTokens: 0.27,
pricePerMillionOutputTokens: 1.1,
tokenizerId: 'Xenova/gpt-4o',
recommendedForCoding: true,
supportsImageInput: false,
legacy: false,
openRouterModelId: 'deepseek/deepseek-chat-v3-0324',
},
[ModelEnum['deepseek-reasoner']]: {
name: 'DeepSeek-R1',
provider: AI_PROVIDERS.DEEPSEEK,
contextWindowTokenLimit: 64000,
outputTokenLimit: 8000,
pricePerMillionInputTokens: 0.55,
pricePerMillionOutputTokens: 2.19,
tokenizerId: 'Xenova/gpt-4o',
reasoning: true,
supportsImageInput: false,
legacy: false,
},
[ModelEnum['gemini-2.5-pro-exp-03-25']]: {
name: 'Gemini 2.5 Pro Experimental',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 0,
pricePerMillionOutputTokens: 0,
tokenizerId: 'Xenova/gpt-4o',
notes: 'This is an experimental model that is currently free to test.',
reasoning: true,
recommendedForCoding: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Gemini 2.5 Pro Preview',
},
[ModelEnum['gemini-2.5-pro-preview-03-25']]: {
name: 'Gemini 2.5 Pro Preview (03-25)',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 2.5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
notes: 'The pricing is for prompt with context length > 200k tokens',
notesUrl: 'https://ai.google.dev/gemini-api/docs/pricing',
reasoning: true,
recommendedForCoding: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Gemini 2.5 Pro Preview (05-06)',
},
[ModelEnum['gemini-2.5-pro-preview-05-06']]: {
name: 'Gemini 2.5 Pro Preview (05-06)',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 2.5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
notes: 'The pricing is for prompt with context length > 200k tokens',
notesUrl: 'https://ai.google.dev/gemini-api/docs/pricing',
reasoning: true,
recommendedForCoding: true,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Gemini 2.5 Pro Preview (06-05)',
openRouterModelId: 'google/gemini-2.5-pro-preview',
},
[ModelEnum['gemini-2.5-pro-preview-06-05']]: {
name: 'Gemini 2.5 Pro Preview (06-05)',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 2.5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
notes: 'The pricing is for prompt with context length > 200k tokens',
notesUrl: 'https://ai.google.dev/gemini-api/docs/pricing',
reasoning: true,
recommendedForCoding: true,
recommendedForWriting: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Gemini 2.5 Pro',
openRouterModelId: 'google/gemini-2.5-pro-preview',
},
[ModelEnum['gemini-2.5-pro']]: {
name: 'Gemini 2.5 Pro',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 2.5,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
notes: 'The pricing is for prompt with context length > 200k tokens',
notesUrl: 'https://ai.google.dev/gemini-api/docs/pricing',
reasoning: true,
recommendedForCoding: true,
recommendedForWriting: true,
supportsImageInput: true,
legacy: false,
openRouterModelId: 'google/gemini-2.5-pro',
},
[ModelEnum['gemini-2.5-flash-preview-04-17']]: {
name: 'Gemini 2.5 Flash Preview',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 0,
pricePerMillionOutputTokens: 0,
tokenizerId: 'Xenova/gpt-4o',
notes:
"Google's best model in terms of price-performance, offering well-rounded capabilities",
reasoning: true,
small: true,
recommendedForCoding: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Gemini 2.5 Flash Preview (05-20)',
},
[ModelEnum['gemini-2.5-flash-preview-05-20']]: {
name: 'Gemini 2.5 Flash Preview (05-20)',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 0,
pricePerMillionOutputTokens: 0,
tokenizerId: 'Xenova/gpt-4o',
notes:
"Google's best model in terms of price-performance, offering well-rounded capabilities",
reasoning: true,
small: true,
recommendedForCoding: false,
supportsImageInput: true,
legacy: true,
legacyReason: 'Superceded by Gemini 2.5 Flash',
openRouterModelId: 'google/gemini-2.5-flash-preview-05-20',
},
[ModelEnum['gemini-2.5-flash']]: {
name: 'Gemini 2.5 Flash',
provider: AI_PROVIDERS.GOOGLE,
contextWindowTokenLimit: 1048576,
outputTokenLimit: 65536,
pricePerMillionInputTokens: 0.3,
pricePerMillionOutputTokens: 2.5,
tokenizerId: 'Xenova/gpt-4o',
notes:
"Google's best model in terms of price-performance, offering well-rounded capabilities",
reasoning: true,
small: true,
recommendedForCoding: false,
supportsImageInput: true,
legacy: false,
openRouterModelId: 'google/gemini-2.5-flash',
},
[ModelEnum['grok-4']]: {
name: 'Grok 4',
provider: AI_PROVIDERS.XAI,
contextWindowTokenLimit: 256000,
outputTokenLimit: 32768, // no info
pricePerMillionInputTokens: 3,
pricePerMillionOutputTokens: 15,
tokenizerId: 'Xenova/gpt-4o',
reasoning: true,
supportsImageInput: true,
recommendedForCoding: false,
recommendedForWriting: false,
legacy: false,
},
};
// Function to get ModelInfo with ID
export function getModelInfoWithId(id: ModelLike): ModelInfo {
return createModelInfo(id, ModelInfoMap[id]);
}
// Function to get all models with IDs
export function getAllModelsWithIds(): ModelInfo[] {
return Object.entries(ModelInfoMap).map(([id, info]) =>
createModelInfo(id, info)
);
}