UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

153 lines (152 loc) 5.86 kB
{ "defaultModelName": "oai-gpturbo", "models": { "azure-translate": { "type": "AZURE-TRANSLATE", "url": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0", "headers": { "Ocp-Apim-Subscription-Key": "{{ARCHIPELAGO_TRANSLATE_KEY}}", "Ocp-Apim-Subscription-Region": "eastus", "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 2000 }, "gemini-pro-chat": { "type": "GEMINI-CHAT", "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent", "headers": { "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 32768, "maxReturnTokens": 8192, "supportsStreaming": true }, "gemini-pro-vision": { "type": "GEMINI-VISION", "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-pro-vision:streamGenerateContent", "headers": { "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 32768, "maxReturnTokens": 2048, "supportsStreaming": true }, "gemini-pro-15-vision": { "type": "GEMINI-VISION", "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-1.5-pro:streamGenerateContent", "headers": { "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 1048576, "maxReturnTokens": 2048, "supportsStreaming": true }, "gemini-pro-25-vision": { "type": "GEMINI-VISION", "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-2.5-pro:streamGenerateContent", "headers": { "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 1048576, "maxReturnTokens": 65536, "supportsStreaming": true }, "claude-3-haiku-vertex": { "type": "CLAUDE-3-VERTEX", "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/anthropic/models/claude-3-haiku@20240307", "headers": { "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 200000, "maxReturnTokens": 2048, "supportsStreaming": true }, "claude-35-sonnet-vertex": { "type": "CLAUDE-3-VERTEX", "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/anthropic/models/claude-3-5-sonnet@20240229", "headers": { "Content-Type": "application/json" }, "requestsPerSecond": 10, "maxTokenLength": 200000, "maxReturnTokens": 2048, "supportsStreaming": true }, "oai-gpturbo": { "type": "OPENAI-CHAT", "url": "https://api.openai.com/v1/chat/completions", "headers": { "Authorization": "Bearer {{OPENAI_API_KEY}}", "Content-Type": "application/json" }, "params": { "model": "gpt-3.5-turbo" }, "requestsPerSecond": 10, "maxTokenLength": 8192 }, "oai-gpt4": { "type": "OPENAI-CHAT", "url": "https://api.openai.com/v1/chat/completions", "headers": { "Authorization": "Bearer {{OPENAI_API_KEY}}", "Content-Type": "application/json" }, "params": { "model": "gpt-4" }, "requestsPerSecond": 10, "maxTokenLength": 8192 }, "oai-gpt4-32": { "type": "OPENAI-CHAT", "url": "https://api.openai.com/v1/chat/completions", "headers": { "Authorization": "Bearer {{OPENAI_API_KEY}}", "Content-Type": "application/json" }, "params": { "model": "gpt-4-32" }, "requestsPerSecond": 10, "maxTokenLength": 32768 }, "oai-gpt4o": { "type": "OPENAI-VISION", "url": "https://api.openai.com/v1/chat/completions", "headers": { "Authorization": "Bearer {{OPENAI_API_KEY}}", "Content-Type": "application/json" }, "params": { "model": "gpt-4o" }, "requestsPerSecond": 10, "maxTokenLength": 131072, "maxReturnTokens": 4096, "supportsStreaming": true }, "local-llama13B": { "type": "LOCAL-CPP-MODEL", "executablePath": "../llm/llama.cpp/main", "args": [ "-m", "../llm/llama.cpp/models/13B/ggml-model-q4_0.bin", "--repeat_penalty", "1.0", "--keep", "0", "-t", "8", "--mlock" ], "requestsPerSecond": 10, "maxTokenLength": 1024 } }, "enableCache": false, "enableRestEndpoints": false, "subscriptionKeepAlive": 10000 }