UNPKG

@kluai/gateway

Version:

OpenAI compatible gateway.

1,870 lines (1,863 loc) 83.4 kB
"use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _nullishCoalesce(lhs, rhsFn) { if (lhs != null) { return lhs; } else { return rhsFn(); } }// src/schema.ts var ProviderBase = { openai: "https://api.openai.com", anthropic: "https://api.anthropic.com", perplexity: "https://api.perplexity.ai", replicate: "https://openai-proxy.replicate.com", cloudflare: "https://api.cloudflare.com/client/v4/accounts/", mistral: "https://api.mistral.ai", groq: "https://api.groq.com/openai/", google: "https://generativelanguage.googleapis.com/v1beta/models/", gcp_vertex: "https://us-central1-vertex-ai.cloud.google.com", cohere: "https://api.cohere.ai/v1/chat", ai21: "https://api.ai21.com/studio", together: "https://api.together.xyz" }; var AvailableModels = { "gpt-3.5-turbo": { format: "openai", type: "chat", inputCost: 15e-7, outputCost: 2e-6 }, "gpt-3": { format: "openai", type: "chat", inputCost: 15e-7, outputCost: 2e-6 }, "gpt-35-turbo": { format: "openai", type: "chat", inputCost: 15e-7, outputCost: 2e-6 }, "gpt-3.5-turbo-1106": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "gpt-3.5-turbo-16k": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 4e-6 }, "gpt-35-turbo-16k": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 4e-6 }, "gpt-4": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 6e-5 }, "gpt-4o": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 6e-5 }, "gpt-4o-2024-05-13": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 6e-5 }, "gpt-4o-2024-08-06": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 6e-5 }, "gpt-4-1106-preview": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, // need cost per token "gpt-4-vision-preview": { format: "openai", type: "multi", inputCost: 1, outputCost: 1 }, // need cost per token "gpt-4-32k": { format: "openai", type: "chat", inputCost: 6e-5, outputCost: 12e-5 }, "gpt-3.5-turbo-0613": { format: "openai", type: "chat", inputCost: 15e-7, outputCost: 2e-6 }, "gpt-3.5-turbo-16k-0613": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 4e-6 }, "gpt-3.5-turbo-0301": { format: "openai", type: "chat", inputCost: 15e-7, outputCost: 2e-6 }, "gpt-4-0613": { format: "openai", type: "chat", inputCost: 3e-6, outputCost: 6e-5 }, "gpt-4-32k-0613": { format: "openai", type: "chat", inputCost: 6e-5, outputCost: 12e-5 }, "gpt-4-0314": { format: "openai", type: "chat", inputCost: 6e-5, outputCost: 12e-5 }, "gpt-4-32k-0314": { format: "openai", type: "chat", inputCost: 6e-5, outputCost: 12e-5 }, "gpt-3.5-turbo-instruct": { format: "openai", type: "completion", inputCost: 6e-5, outputCost: 12e-5 }, // need to update "text-davinci-003": { format: "openai", type: "completion", inputCost: 6e-5, outputCost: 12e-5 }, // need to update "claude-2": { format: "anthropic", type: "chat", inputCost: 1102e-8, outputCost: 3268e-8 }, "claude-instant-1": { format: "anthropic", type: "chat", inputCost: 163e-8, outputCost: 551e-8 }, "claude-2.0": { format: "anthropic", type: "chat", inputCost: 1102e-8, outputCost: 3268e-8 }, "claude-2.1": { format: "anthropic", type: "chat", inputCost: 1102e-8, outputCost: 3268e-8 }, // need new pricing "claude-instant-1.2": { format: "anthropic", type: "chat", inputCost: 1102e-8, outputCost: 3268e-8 }, "claude-3-opus-20240229": { format: "anthropic", type: "chat", inputCost: 15e-6, outputCost: 75e-6 }, "claude-3-sonnet-20240229": { format: "anthropic", type: "chat", inputCost: 3e-6, outputCost: 15e-6 }, "claude-3-haiku-20240307": { format: "anthropic", type: "chat", inputCost: 25e-8, outputCost: 125e-8 }, "claude-3-5-sonnet-20240620": { format: "anthropic", type: "chat", inputCost: 3e-6, outputCost: 15e-6 }, "claude-3-5-haiku-20241022": { format: "anthropic", type: "chat", inputCost: 3e-6, outputCost: 15e-6 }, "claude-3-5-sonnet-20241022": { format: "anthropic", type: "chat", inputCost: 3e-6, outputCost: 15e-6 }, "meta/llama-2-70b-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "llama-2-70b-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "llama-2-13b-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "codellama-34b-instruct": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "mistral-7b-instruct": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "openhermes-2-mistral-7b": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "openhermes-2.5-mistral-7b": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "pplx-7b-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "pplx-70b-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "pplx-7b-online": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "pplx-70b-online": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "sonnar-small-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "sonar-small-online": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "sonnar-medium-chat": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "sonar-medium-online": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "chat-bison-001": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "text-bison-001": { format: "google", type: "completion", inputCost: 1, outputCost: 1 }, "embedding-gecko-001": { format: "google", type: "embedding", inputCost: 1, outputCost: 1 }, "gemini-1.0-pro": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-pro-exp-0801": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.0-pro-001": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.0-pro-latest": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-pro-latest": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.0-pro-vision-latest": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-latest": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-pro-2m-latest": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-pro": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-pro-vision": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "embedding-001": { format: "google", type: "embedding", inputCost: 1, outputCost: 1 }, aqa: { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "command-r": { format: "cohere", type: "chat", inputCost: 1, outputCost: 1 }, "command-r-plus": { format: "cohere", type: "chat", inputCost: 1, outputCost: 1 }, command: { format: "cohere", type: "chat", inputCost: 1, outputCost: 1 }, "command-nightly": { format: "cohere", type: "chat", inputCost: 1, outputCost: 1 }, "command-light": { format: "cohere", type: "chat", inputCost: 1, outputCost: 1 }, "command-light-nightly": { format: "cohere", type: "chat", inputCost: 1, outputCost: 1 }, "codestral-mamba-latest": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "codestral-mamba-2407": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "open-mistral-nemo-2407": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "open-mistral-nemo": { format: "openai", type: "chat", inputCost: 1, outputCost: 1 }, "gpt-4-0125-preview": { format: "openai", type: "chat", inputCost: 0.01, outputCost: 0.03 }, "gpt-4-turbo-preview": { format: "openai", type: "chat", inputCost: 0.01, outputCost: 0.03 }, "gpt-4-turbo": { format: "openai", type: "chat", inputCost: 0.01, outputCost: 0.03 }, "gpt-4-1106-vision-preview": { format: "openai", type: "multi", inputCost: 0.02, outputCost: 0.03 }, "gpt-4-turbo-2024-04-09": { format: "openai", type: "chat", inputCost: 0.01, outputCost: 0.03 }, "gpt-3.5": { format: "openai", type: "chat", inputCost: 15e-4, outputCost: 2e-3 }, "gpt-3.5-turbo-0125": { format: "openai", type: "chat", inputCost: 5e-4, outputCost: 15e-4 }, "gpt-3.5-16k": { format: "openai", type: "chat", inputCost: 3e-3, outputCost: 4e-3 }, "claude-instant-v1": { format: "anthropic", type: "chat", inputCost: 163e-5, outputCost: 551e-5 }, "claude-instant-v1-100k": { format: "anthropic", type: "chat", inputCost: 163e-5, outputCost: 551e-5 }, "claude-v1": { format: "anthropic", type: "chat", inputCost: 163e-5, outputCost: 551e-5 }, "claude-v1-100k": { format: "anthropic", type: "chat", inputCost: 163e-5, outputCost: 551e-5 }, "open-mistral-7b": { format: "openai", type: "chat", inputCost: 25e-5, outputCost: 25e-5 }, "open-mixtral-8x7b": { format: "openai", type: "chat", inputCost: 7e-4, outputCost: 7e-4 }, "mistral-small-latest": { format: "openai", type: "chat", inputCost: 2e-3, outputCost: 6e-3 }, "mistral-small-2312": { format: "openai", type: "chat", inputCost: 2e-3, outputCost: 6e-3 }, "mistral-small-2402": { format: "openai", type: "chat", inputCost: 2e-3, outputCost: 6e-3 }, "mistral-medium-latest": { format: "openai", type: "chat", inputCost: 27e-4, outputCost: 81e-4 }, "mistral-medium-2312": { format: "openai", type: "chat", inputCost: 27e-4, outputCost: 81e-4 }, "mistral-large-latest": { format: "openai", type: "chat", inputCost: 8e-3, outputCost: 0.024 }, "mistral-large-2402": { format: "openai", type: "chat", inputCost: 8e-3, outputCost: 0.024 }, "mistral-tiny": { format: "openai", type: "chat", inputCost: 25e-5, outputCost: 25e-5 }, "llama-70b-4096": { format: "openai", type: "chat", inputCost: 7e-4, outputCost: 8e-4 }, "gemma-7b-it": { format: "openai", type: "chat", inputCost: 1e-4, outputCost: 1e-4 }, "mixtral-8x7b-32768": { format: "openai", type: "chat" }, "gemini-1.5-pro-001": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-pro-002": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-001": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-002": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-8b": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-8b-001": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-8b-latest": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-8b-exp-0827": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-1.5-flash-8b-exp-0924": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-2.0-flash-exp": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-exp-1206": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-exp-1121": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "gemini-exp-1114": { format: "google", type: "chat", inputCost: 1, outputCost: 1 }, "learnlm-1.5-pro-experimental": { format: "google", type: "chat", inputCost: 1, outputCost: 1 } }; var ModelProviderMap = { "jamba-instruct-preview": "ai21", "ai21.j2-grande-instruct": "bedrock", "ai21.j2-jumbo-instruct": "bedrock", "ai21.j2-mid": "bedrock", "ai21.j2-mid-v1": "bedrock", "ai21.j2-ultra": "bedrock", "ai21.j2-ultra-v1:0:8k": "bedrock", "amazon.titan-text-express-v1": "bedrock", "amazon.titan-text-express-v1:0:8k": "bedrock", "amazon.titan-text-lite-v1": "bedrock", "amazon.titan-text-lite-v1:0:4k": "bedrock", "amazon.titan-text-premier-v1:0": "bedrock", "amazon.titan-tg1-large": "bedrock", "anthropic.claude-3-haiku-20240307-v1:0:200k": "bedrock", "anthropic.claude-3-haiku-20240307-v1:0": "bedrock", "anthropic.claude-3-haiku-20240307-v1:0:48k": "bedrock", "anthropic.claude-3-sonnet-20240229-v1:0": "bedrock", "anthropic.claude-3-sonnet-20240229-v1:0:28k": "bedrock", "anthropic.claude-3-sonnet-20240229-v1:0:200k": "bedrock", "anthropic.claude-3-opus-20240229-v1:0": "bedrock", "anthropic.claude-instant-v1": "bedrock", "anthropic.claude-instant-v1:2:100k": "bedrock", "anthropic.claude-v2": "bedrock", "anthropic.claude-v2:0:18k": "bedrock", "anthropic.claude-v2:0:100k": "bedrock", "anthropic.claude-v2:1": "bedrock", "anthropic.claude-v2:1:18k": "bedrock", "anthropic.claude-v2:1:200k": "bedrock", "cohere.command-light-text-v14": "bedrock", "cohere.command-light-text-v14:7:4k": "bedrock", "cohere.command-r-plus-v1:0": "bedrock", "cohere.command-r-v1:0": "bedrock", "cohere.command-text-v14": "bedrock", "cohere.command-text-v14:7:4k": "bedrock", "meta.llama2-13b-chat-v1": "bedrock", "meta.llama2-13b-chat-v1:0:4k": "bedrock", "meta.llama2-13b-v1": "bedrock", "meta.llama2-13b-v1:0:4k": "bedrock", "meta.llama2-70b-chat-v1": "bedrock", "meta.llama2-70b-chat-v1:0:4k": "bedrock", "meta.llama2-70b-v1": "bedrock", "meta.llama2-70b-v1:0:4k": "bedrock", "meta.llama3-70b-instruct-v1:0": "bedrock", "meta.llama3-8b-instruct-v1:0": "bedrock", "meta.llama3-1-8b-instruct-v1:0": "bedrock", "meta.llama3-1-70b-instruct-v1:0": "bedrock", "meta.llama3-1-405b-instruct-v1:0": "bedrock", "mistral.mixtral-8x7b-instruct-v0:1": "bedrock", "mistral.mistral-7b-instruct-v0:2": "bedrock", "mistral.mistral-large-2402-v1:0": "bedrock", "gemma2-9b-it": "groq", "gemma-7b-it": "groq", "llama-3.1-405b-reasoning": "groq", "llama-3.1-70b-versatile": "groq", "llama-3.1-8b-instant": "groq", "llama3-70b-8192": "groq", "llama3-8b-8192": "groq", "llama3-groq-70b-8192-tool-use-preview": "groq", "llama3-groq-8b-8192-tool-use-preview": "groq", "mixtral-8x7b-32768": "groq", "whisper-large-v3": "groq", "chat-bison-001": "google", "text-bison-001": "google", "embedding-gecko-001": "google", "gemini-1.0-pro": "google", "gemini-1.0-pro-001": "google", "gemini-1.0-pro-latest": "google", "gemini-1.0-pro-vision-latest": "google", "gemini-1.5-pro-2m-latest": "google", "gemini-1.5-pro-001": "google", "gemini-1.5-pro": "google", "gemini-1.5-flash-001": "google", "gemini-1.5-flash": "google", "gemini-pro": "google", "gemini-pro-vision": "google", "embedding-001": "google", "gemini-1.5-pro-exp-0801": "google", "gemini-1.5-flash-8b-exp-0827": "google", "gemini-1.5-flash-exp-0827": "google", "gemini-1.5-pro-exp-0827": "google", "gemini-1.5-pro-002": "google", "gemini-1.5-flash-002": "google", aqa: "google", "llama-3-sonar-small-32k-chat": "perplexity", "llama-3-sonar-small-32k-online": "perplexity", "llama-3-sonar-large-32k-chat": "perplexity", "llama-3-sonar-large-32k-online": "perplexity", "llama-3.1-sonar-small-128k-online": "perplexity", "llama-3.1-sonar-small-128k-chat": "perplexity", "llama-3.1-sonar-large-128k-online": "perplexity", "llama-3.1-sonar-large-128k-chat": "perplexity", "llama-3-8b-instruct": "perplexity", "llama-3-70b-instruct": "perplexity", "sonar-small-chat": "perplexity", "sonar-small-online": "perplexity", "sonar-medium-chat": "perplexity", "sonar-medium-online": "perplexity", "open-mistral-7b": "mistral", "mistral-tiny-2312": "mistral", "mistral-tiny": "mistral", "open-mixtral-8x7b": "mistral", "open-mixtral-8x22b": "mistral", "open-mixtral-8x22b-2404": "mistral", "mistral-small-2312": "mistral", "mistral-small": "mistral", "mistral-small-2402": "mistral", "mistral-small-latest": "mistral", "mistral-medium-latest": "mistral", "mistral-medium-2312": "mistral", "mistral-medium": "mistral", "mistral-large-latest": "mistral", "mistral-large-2402": "mistral", "mistral-large-2407": "mistral", "mistral-large": "mistral", "mistral-embed": "mistral", "mixtral-8x7b": "mistral", "codestral-mamba-latest": "mistral", "llama-3-groq-70b-8192-tool-use-preview": "groq", "llama-3-groq-8b-8192-tool-use-preview": "groq", "codestral-mamba-2407": "mistral", "open-mistral-nemo-2407": "mistral", "open-mistral-nemo": "mistral", "zero-one-ai/Yi-34B-Chat": "together", "allenai/OLMo-7B-Instruct": "together", "allenai/OLMo-7B-Twin-2T": "together", "allenai/OLMo-7B": "together", "Austism/chronos-hermes-13b": "together", "cognitivecomputations/dolphin-2.5-mixtral-8x7b": "together", "databricks/dbrx-instruct": "together", "deepseek-ai/deepseek-coder-33b-instruct": "together", "deepseek-ai/deepseek-llm-67b-chat": "together", "garage-bAInd/Platypus2-70B-instruct": "together", "google/gemma-2b-it": "together", "google/gemma-7b-it": "together", "Gryphe/MythoMax-L2-13b": "together", "lmsys/vicuna-13b-v1.5": "together", "lmsys/vicuna-7b-v1.5": "together", "codellama/CodeLlama-13b-Instruct-hf": "together", "codellama/CodeLlama-34b-Instruct-hf": "together", "codellama/CodeLlama-70b-Instruct-hf": "together", "codellama/CodeLlama-7b-Instruct-hf": "together", "meta-llama/Llama-2-70b-chat-hf": "together", "meta-llama/Llama-2-13b-chat-hf": "together", "meta-llama/Llama-2-7b-chat-hf": "together", "meta-llama/Llama-3-8b-chat-hf": "together", "meta-llama/Llama-3-70b-chat-hf": "together", "mistralai/Mistral-7B-Instruct-v0.1": "together", "mistralai/Mistral-7B-Instruct-v0.2": "together", "mistralai/Mistral-7B-Instruct-v0.3": "together", "mistralai/Mixtral-8x7B-Instruct-v0.1": "together", "mistralai/Mixtral-8x22B-Instruct-v0.1": "together", "NousResearch/Nous-Capybara-7B-V1p9": "together", "NousResearch/Nous-Hermes-2-Mistral-7B-DPO": "together", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": "together", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT": "together", "NousResearch/Nous-Hermes-llama-2-7b": "together", "NousResearch/Nous-Hermes-Llama2-13b": "together", "NousResearch/Nous-Hermes-2-Yi-34B": "together", "openchat/openchat-3.5-1210": "together", "Open-Orca/Mistral-7B-OpenOrca": "together", "Qwen/Qwen1.5-0.5B-Chat": "together", "Qwen/Qwen1.5-1.8B-Chat": "together", "Qwen/Qwen1.5-4B-Chat": "together", "Qwen/Qwen1.5-7B-Chat": "together", "Qwen/Qwen1.5-14B-Chat": "together", "Qwen/Qwen1.5-32B-Chat": "together", "Qwen/Qwen1.5-72B-Chat": "together", "Qwen/Qwen1.5-110B-Chat": "together", "Qwen/Qwen2-72B-Instruct": "together", "snorkelai/Snorkel-Mistral-PairRM-DPO": "together", "Snowflake/snowflake-arctic-instruct": "together", "togethercomputer/alpaca-7b": "together", "teknium/OpenHermes-2-Mistral-7B": "together", "teknium/OpenHermes-2p5-Mistral-7B": "together", "togethercomputer/Llama-2-7B-32K-Instruct": "together", "togethercomputer/RedPajama-INCITE-Chat-3B-v1": "together", "togethercomputer/RedPajama-INCITE-7B-Chat": "together", "togethercomputer/StripedHyena-Nous-7B": "together", "Undi95/ReMM-SLERP-L2-13B": "together", "Undi95/Toppy-M-7B": "together", "WizardLM/WizardLM-13B-V1.2": "together", "upstage/SOLAR-10.7B-Instruct-v1.0": "together", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": "together", "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": "together", "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": "together", "meta-llama/Meta-Llama-3-8B-Instruct-Turbo": "together", "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": "together", "meta-llama/Meta-Llama-3-8B-Instruct-Lite": "together", "meta-llama/Meta-Llama-3-70B-Instruct-Lite": "together" }; // src/providers/ai21.ts function convertAI21StopReasonToOpenAIStopReason(reason) { if (reason === "endoftext") { return "stop"; } if (reason === "max_tokens") { return "length"; } return "stop"; } function convertAI21GenerationResponse(response, model) { const completion = response.completions[0]; const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { id: response.id, choices: [ { finish_reason: convertAI21StopReasonToOpenAIStopReason(completion.finishReason.reason) || "stop", index: 0, message: { role: "assistant", content: completion.data.text.trimStart() } } ], created: currentTimeInSeconds, usage: { prompt_tokens: response.prompt.tokens.length, completion_tokens: completion.data.tokens.length, total_tokens: completion.data.tokens.length + response.prompt.tokens.length }, model, object: "chat.completion" }; } async function fetchAI21Request(body, headers, url) { const response = await fetch(url, { method: "POST", headers, body: JSON.stringify(body), keepalive: true }); return { stream: response.body, response }; } // src/utils.ts var _eventsourceparser = require('eventsource-parser'); function getCompletionFromStream(stream) { if (!stream) return { raw_ouput: "", completion: "" }; const events = stream.split("\n\n").filter((line) => line.startsWith("data: ")); let completion = ""; const raw_output = []; for (const event of events) { const json = event.replace("data: ", ""); try { const parsed = JSON.parse(json); raw_output.push(parsed); completion += parsed.choices[0].delta.content || ""; } catch (e) { continue; } } const strinfied_response = JSON.stringify({ stream: raw_output }); return { raw_output: strinfied_response, completion }; } async function logToKlu(body, kluApiKey, output, stream = false, actionGuid, dataGuid) { const baseUrl = "https://api.klu.ai/v1/data/"; if (kluApiKey === void 0) { return; } const url = dataGuid !== void 0 ? `${baseUrl}${dataGuid}` : baseUrl; let payload; if (dataGuid !== void 0) { payload = { raw_llm_response: stream ? getCompletionFromStream(output).raw_output : output, raw_llm_request: body }; } else { const userMessages = body.messages.filter( (message) => message.role === "user" ); const lastUserMessage = userMessages[userMessages.length - 1]; let outputLog; let raw_llm_response; if (stream) { const { raw_output, completion } = getCompletionFromStream(output); raw_llm_response = raw_output || ""; outputLog = completion; } else { const responseJson = JSON.parse(output); outputLog = responseJson.choices[0].message.content; raw_llm_response = JSON.stringify(responseJson); } payload = { action: actionGuid, full_prompt_sent: JSON.stringify(body["messages"]), output: outputLog, input: lastUserMessage.content, model: body["model"], model_provider: "OpenAI", metadata: { source: "Gateway" }, raw_llm_response, raw_llm_request: body }; } const requestData = { method: dataGuid !== void 0 ? "PUT" : "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${kluApiKey}` }, body: JSON.stringify(payload) }; const response = await fetch(url, requestData); } function getProvider(model, url, headers) { var _a; let provider = "openai"; if (url.includes("deployment") && headers["deployment-url"]) { return "azure"; } if ((_a = headers["deployment-url"]) == null ? void 0 : _a.includes("googleapis.com")) { return "gcp_vertex"; } if (headers["deployment-url"]) { return "custom-deployment"; } if (model.includes("@cf") || model.includes("@hf")) { return "cloudflare"; } if (headers["x-amzn-access-key"] || headers["x-amzn-secret-key"] || headers["x-amzn-region"]) { return "bedrock"; } if ("google" == headers["x-model-provider"]) { return "google"; } if (model in ModelProviderMap) { provider = ModelProviderMap[model]; } else if (model in AvailableModels) { provider = AvailableModels[model].format; } return provider; } function getBaseURL(provider, headers) { if (headers["x-amzn-region"]) { return `https://bedrock-runtime.${headers["x-amzn-region"]}.amazonaws.com/model/`; } else if (!["azure", "gcp_vertex"].includes(provider) && provider !== "custom-deployment") { return ProviderBase[provider] || ProviderBase["openai"]; } else { return headers["deployment-url"]; } } function flattenChunks(allChunks) { const flatArray = new Uint8Array(allChunks.reduce((a, b) => a + b.length, 0)); for (let i = 0, offset = 0; i < allChunks.length; i++) { flatArray.set(allChunks[i], offset); offset += allChunks[i].length; } return new TextDecoder().decode(flatArray); } function createEventStreamTransformer(customParser) { const textDecoder = new TextDecoder(); let eventSourceParser; function isDoneEvent(event) { return "data" in event && event.type === "event" && event.data === "[DONE]" || event.event === "done"; } function enqueueData(controller, data) { controller.enqueue(new TextEncoder().encode("data: " + data + "\n\n")); } function finish(controller) { enqueueData(controller, "[DONE]"); controller.terminate(); } return new TransformStream({ async start(controller) { eventSourceParser = _eventsourceparser.createParser.call(void 0, (event) => { if (isDoneEvent(event)) { finish(controller); return; } if ("data" in event) { const parsedMessage = customParser(event.data); if (parsedMessage.data !== null) { enqueueData(controller, parsedMessage.data); } if (parsedMessage.finished) { finish(controller); } } } ); }, transform(chunk) { eventSourceParser.feed(textDecoder.decode(chunk)); } }); } function getMimeType(base64) { const signatures = { JVBERi0: "application/pdf", R0lGODdh: "image/gif", R0lGODlh: "image/gif", iVBORw0KGgo: "image/png", UklGR: "image/webp", "/9j/": "image/jpeg" }; for (const sign in signatures) { if (base64.startsWith(sign)) return signatures[sign]; } return "image/png"; } function returnProviderKey(provider, env) { if (provider === "openai") { return env.OPENAI_API_KEY; } else if (provider === "anthropic") { return env.ANTHROPIC_API_KEY; } else if (provider === "groq") { return env.GROQ_API_KEY; } else if (provider === "mistral") { return env.MISTRAL_API_KEY; } else if (provider === "perplexity") { return env.PERPLEXITY_API_KEY; } else if (provider === "cohere") { return env.COHERE_API_KEY; } else if (provider === "ai21") { return env.AI21_API_KEY; } else if (provider === "google") { return env.GOOGLE_API_KEY; } else if (provider === "together") { return env.TOGETHER_API_KEY; } } // src/providers/anthropic.ts var openaiToAnthropicParamMap = { temperature: "temperature", top_p: "top_p", max_tokens: "max_tokens_to_sample", top_k: "top_k", stream: "stream" }; function convertOpenAItoAnthropicParams(openaiParams) { const anthropicParams = {}; for (const [key, value] of Object.entries(openaiParams)) { if (key in openaiToAnthropicParamMap) { anthropicParams[openaiToAnthropicParamMap[key]] = value; } } if (!("temperature" in anthropicParams)) { anthropicParams["temperature"] = 0.5; } if (!("max_tokens_to_sample" in anthropicParams)) { anthropicParams["max_tokens_to_sample"] = 500; } return anthropicParams; } function anthropicFinishReason(stop_reason) { if (stop_reason === "stop_reason" || stop_reason === "end_turn" || stop_reason === "stop_sequence") { return "stop"; } else if (stop_reason === "max_tokens") { return "length"; } else if (stop_reason === "tool_use") { return "tool_calls"; } else { return null; } } function openAItoAnthropicPrompt(messages) { return "\n\n" + messages.map( ({ content, role }) => `${role === "user" || role === "human" ? "Human" : "Assistant"}: ${content}` ).join("\n\n") + "\n\nAssistant:"; } function anthropicCompletionToOpenAICompletion(completion) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { id: completion.log_id, choices: [ { finish_reason: anthropicFinishReason(completion.stop_reason) || "stop", index: 0, message: { role: "assistant", content: completion.completion.trimStart() } } ], created: currentTimeInSeconds, model: completion.model, object: "chat.completion" }; } function anthropicMessageResponseToOpenAICompletion(completion) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { id: `${completion.id}`, object: "chat.completion", created: currentTimeInSeconds, model: completion.model, choices: completion.content.map((content, index) => { let message = { role: completion.role, content: content.text }; if (content.type === "tool_use") { message.tool_calls = [ { id: content.id, type: "function", function: { name: content.name, arguments: JSON.stringify(content.input) } } ]; } return { index, message, logprobs: null, finish_reason: anthropicFinishReason(completion.stop_reason) }; }), usage: { prompt_tokens: completion.usage.input_tokens, completion_tokens: completion.usage.output_tokens, total_tokens: completion.usage.input_tokens + completion.usage.output_tokens } }; } function anthroCompleteEventConverterToOpenAI(idx, event, model) { const completionContent = event.completion || ""; const finishReason = event.stop_reason ? anthropicFinishReason(event.stop_reason) : null; const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { event: { id: _nullishCoalesce(event.log_id, () => ( `id-${idx}`)), choices: [ { delta: { content: completionContent, role: "assistant" }, finish_reason: finishReason, index: idx } ], created: currentTimeInSeconds, model: _nullishCoalesce(_nullishCoalesce(event.model, () => ( model)), () => ( "")), object: "chat.completion.chunk" }, finished: event.stop_reason !== null }; } function anthroMessagesEventConverterToOpenAI(idx, event, model) { var _a, _b, _c, _d, _e, _f, _g, _h; console.log("event is ", JSON.stringify(event)); if ([ "content_block_delta", "message_delta", "input_json_delta", "content_block_start" ].includes(event.type) === false) { return { event: null, finished: false }; } const finishReason = ((_a = event.delta) == null ? void 0 : _a.stop_reason) ? anthropicFinishReason(event.delta.stop_reason) : null; const currentTimeInSeconds = Math.floor(Date.now() / 1e3); let msg; if (((_b = event.content_block) == null ? void 0 : _b.type) == "tool_use" && event.type === "content_block_start") { msg = { event: { id: `id-${idx}`, choices: [ { delta: { content: null, role: "assistant", tool_calls: [ { index: 0, id: event.content_block.id, type: "function", function: { name: event.content_block.name, arguments: "" } } ] }, finish_reason: finishReason, index: 0 } ], created: currentTimeInSeconds, model, object: "chat.completion.chunk" }, finished: !!((_c = event.delta) == null ? void 0 : _c.stop_reason) }; } else if ((event.type = "content_block_delta") && ((_d = event.delta) == null ? void 0 : _d.type) !== "input_json_delta") { const completionContent = ((_e = event.delta) == null ? void 0 : _e.text) || ""; msg = { event: { id: `id-${idx}`, choices: [ { delta: { content: completionContent, role: "assistant" }, finish_reason: finishReason, index: 0 } ], created: currentTimeInSeconds, model, object: "chat.completion.chunk" }, finished: !!((_f = event.delta) == null ? void 0 : _f.stop_reason) }; } else { msg = { event: { id: `id-${idx}`, choices: [ { delta: { tool_calls: [ { index: 0, function: { arguments: (_g = event.delta) == null ? void 0 : _g.partial_json } } ] }, finish_reason: finishReason, index: 0 } ], created: currentTimeInSeconds, model, object: "chat.completion.chunk" }, finished: !!((_h = event.delta) == null ? void 0 : _h.stop_reason) }; } console.log("msg transformed is ", JSON.stringify(msg)); console.log("\n\n"); return msg; } function openAItoAnthropicMessagesConverter(messages) { console.log(JSON.stringify(messages, null, 2)); return messages.map((message) => { if (message.role === "tool") { return { role: "user", content: [ { type: "tool_result", tool_use_id: message.tool_call_id, content: message.content } ] }; } else if (message.role === "assistant" && message.tool_calls && message.tool_calls.length > 0) { return { role: "assistant", content: message.tool_calls.map((c) => ({ type: "tool_use", id: c.id, name: c.function.name, input: JSON.parse(c.function.arguments) })) }; } else if (typeof message.content === "string") { return message; } else { return { content: message.content.map((c) => { if (typeof c === "object" && "image_url" in c) { if (typeof c.image_url === "object" && "url" in c.image_url) { return { source: { type: "base64", media_type: getMimeType(c.image_url.url), data: c.image_url.url }, type: "image" }; } else { return { source: { type: "base64", media_type: getMimeType(c.image_url), data: c.image_url }, type: "image" }; } } else { return c; } }), role: message.role }; } }); } function convertOAIInputIntoAnthropicInput(usesMessages, model, oaiMessages, tools, oaiParams, headers) { let anthroRequestPayload = {}; const transformedAnthropicParams = usesMessages ? oaiParams : convertOpenAItoAnthropicParams(oaiParams); if (!usesMessages) { const prompt = openAItoAnthropicPrompt(oaiMessages); anthroRequestPayload = { prompt, model, ...transformedAnthropicParams }; } else { const anthroMessages = oaiMessages.filter( (message) => message.role !== "system" ); const sysMessage = oaiMessages.filter( (message) => message.role === "system" )[0]; const transformedMessages = openAItoAnthropicMessagesConverter(anthroMessages); anthroRequestPayload = { messages: transformedMessages, model, ...transformedAnthropicParams }; if (sysMessage) { anthroRequestPayload["system"] = sysMessage.content; } if (tools && tools.length > 0) { anthroRequestPayload["tools"] = tools.map((tool) => ({ name: tool.function.name, description: tool.function.description, input_schema: tool.function.parameters })); } for (const key of Object.keys(oaiParams)) { if (![ "top_p", "temperature", "max_tokens", "top_k", "tools", "tool_choice", "stop_sequences", "stream", "system" ].includes(key)) { delete anthroRequestPayload[key]; } console.log("final payload is "); console.log(JSON.stringify(anthroRequestPayload, null, 2)); } } return { anthroRequestPayload, headers }; } async function fetchAnthropicRequest(body, headers, url) { headers["accept"] = "application/json"; headers["content-type"] = "application/json"; headers["anthropic-version"] = "2023-06-01"; headers["host"] = "antropic.com"; headers["x-api-key"] = headers.authorization.split(" ")[1]; const usesMessages = body.model.includes("claude-3"); delete body["n"]; url = usesMessages ? "https://api.anthropic.com/v1/messages" : "https://api.anthropic.com/v1/complete"; const { model, messages: oaiMessages, seed, logit_bias, response_format, tools, ...oaiParams } = body; const { anthroRequestPayload, headers: newHeaders } = convertOAIInputIntoAnthropicInput( usesMessages, model, oaiMessages, tools, oaiParams, headers ); const response = await fetch(url, { method: "POST", headers: newHeaders, body: JSON.stringify(anthroRequestPayload), keepalive: true }); let stream = response.body || new ReadableStream({ start(controller) { controller.close(); } }); if (response.ok) { if (!anthroRequestPayload.stream) { const allChunks = []; stream = stream.pipeThrough( new TransformStream({ transform(chunk, controller) { allChunks.push(chunk); }, async flush(controller) { const text = flattenChunks(allChunks); const data = JSON.parse(text); if (!usesMessages) { controller.enqueue( new TextEncoder().encode( JSON.stringify(anthropicCompletionToOpenAICompletion(data)) ) ); } else { console.log("data is ", JSON.stringify(data)); controller.enqueue( new TextEncoder().encode( JSON.stringify( anthropicMessageResponseToOpenAICompletion(data) ) ) ); } controller.terminate(); } }) ); } else { let idx = 0; stream = stream.pipeThrough( createEventStreamTransformer((data) => { if (!usesMessages) { const ret = anthroCompleteEventConverterToOpenAI( idx, JSON.parse(data) ); idx += 1; return { data: ret.event && JSON.stringify(ret.event), finished: ret.finished }; } else { const ret = anthroMessagesEventConverterToOpenAI( idx, JSON.parse(data), model ); idx += 1; return { data: ret.event && JSON.stringify(ret.event), finished: ret.finished }; } }) ); } } return { stream, response }; } // src/providers/cloudflare.ts function cloudflareToOpenAICompetion(text, model) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { id: 0, choices: [ { finish_reason: "stop", index: 0, message: { role: "assistant", content: text } } ], created: currentTimeInSeconds, model, object: "chat.completion" }; } function cloudflareMessageEventConverter(idx, event, model) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { event: { id: `id-${idx}`, choices: [ { delta: { content: event.response }, finish_reason: "", index: 0 } ], created: currentTimeInSeconds, model, object: "chat.completion.chunk" }, finished: false }; } async function fetchCloudflareRequest(body, headers, url) { delete headers["cf-account-id"]; const newBody = { messages: body.messages, max_tokens: body.max_tokens, stream: body.stream }; const response = await fetch(url, { method: "POST", headers, body: JSON.stringify(newBody), keepalive: true }); let stream = response.body || new ReadableStream({ start(controller) { controller.close(); } }); if (response.ok) { if (!newBody.stream) { const allChunks = []; stream = stream.pipeThrough( new TransformStream({ transform(chunk, controller) { allChunks.push(chunk); }, async flush(controller) { const text = flattenChunks(allChunks); const data = JSON.parse(text); controller.enqueue( new TextEncoder().encode( JSON.stringify(cloudflareToOpenAICompetion(data["result"]["response"], body.model)) ) ); controller.terminate(); } }) ); } else { let idx = 0; stream = stream.pipeThrough( createEventStreamTransformer((data) => { const ret = cloudflareMessageEventConverter( idx, JSON.parse(data), body.model ); idx += 1; return { data: ret.event && JSON.stringify(ret.event), finished: ret.finished }; }) ); } } return { stream, response }; } // src/providers/cohere.ts var openAItoCohereParam = { frequency_penalty: "frequency_penalty", temperature: "temperature", top_p: "p", max_tokens: "max_tokens", top_k: "k", stop_sequences: "stop_sequences", presence_penalty: "presence_penalty", seed: "seed" }; function convertOpenAItoCohereParams(openaiParams, provider) { const cohereParams = {}; for (const [key, value] of Object.entries(openaiParams)) { if (key in openAItoCohereParam) { cohereParams[openAItoCohereParam[key]] = value; } } if (!("temperature" in cohereParams)) { cohereParams["temperature"] = 0.5; } if (!("max_tokens" in cohereParams)) { cohereParams["max_tokens"] = 500; } if (provider !== "cohere" && cohereParams.p === 1) { cohereParams.p = 0.99; } return cohereParams; } function cohereFinishReason(stop_reason) { return stop_reason === "UNSPECIFIED " || stop_reason === "COMPLETE" || stop_reason === "OTHER" ? "stop" : stop_reason === "MAX_TOKENS" ? "length" : null; } function cohereCompletionToOpenAI(data, model, idx) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { id: data.generation_id, choices: [ { finish_reason: cohereFinishReason(data.finish_reason), index: 0, message: { role: "assistant", content: data.text } } ], created: currentTimeInSeconds, model, object: "chat.completion" }; } function cohereEventConverter(idx, data, model) { const completionContent = !data.is_finished ? data.text : ""; const finishReason = data.finish_reason ? cohereFinishReason(data.finish_reason) : null; const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { event: { id: idx, choices: [ { delta: { content: completionContent, role: "assistant" }, finish_reason: finishReason, index: idx } ], created: currentTimeInSeconds, model, object: "chat.completion.chunk" }, finished: data.is_finished }; } function convertOpenAIRoleToCohere(role, type) { if (role == "asisstant") { return "CHATBOT"; } else if (role == "user") { return "USER"; } else if (role == "system" && type !== "bedrock") { return "SYSTEM"; } else return "USER"; } function convertOAItoCohereMessages(messages, type) { const cohereMessages = messages.map((message2) => { return { role: convertOpenAIRoleToCohere(message2.role, type), message: message2.content }; }); const message = cohereMessages.pop(); return { message, cohereMessages }; } function cohereGenerationToOpenAI(data, model) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { id: data.id, choices: [ { finish_reason: cohereFinishReason(data.generations[0].finish_reason), index: 0, message: { role: "assistant", content: data.generations[0].text } } ], created: currentTimeInSeconds, model, object: "chat.completion" }; } function cohereGenerationToOpenAIMessageEvent(data, model, idx) { const currentTimeInSeconds = Math.floor(Date.now() / 1e3); return { event: { id: idx, choices: [ { delta: { content: data.generations[0].text, role: "assistant" }, finish_reason: cohereFinishReason(data.generations[0].finish_reason), index: idx } ], created: currentTimeInSeconds, model, object: "chat.completion.chunk" }, finished: true }; } async function fetchCohereRequest(body, headers, url) { const apiKey = headers["authorization"].split(" ")[1]; let newHeaders = new Headers(); newHeaders.set("content-type", "application/json"); newHeaders.set("accept", "application/json"); if (body.stream) { newHeaders.set("accept", "text/event-stream"); } newHeaders.set("Authorization", `bearer ${apiKey}`); const { model, messages: oaiMessages, seed, logit_bias, response_format, ...oaiParams } = body; const { message, cohereMessages } = convertOAItoCohereMessages( oaiMessages, "cohere" ); const cohereParams = convertOpenAItoCohereParams(oaiParams); const cohereRequestPayload = { message: message == null ? void 0 : message.message, chat_history: cohereMessages, ...cohereParams, stream: body.stream }; const response = await fetch(url, { method: "POST", headers: newHeaders, body: JSON.stringify(cohereRequestPayload) }); let stream = response.body || new ReadableStream({ start(controller) { controller.close(); } }); if (response.ok) { if (!body.stream) { const allChunks = []; stream = stream.pipeThrough( new TransformStream({ transform(chunk, controller) { allChunks.push(chunk); }, async flush(controller) { const text = flattenChunks(allChunks); const data = JSON.parse(text); controller.enqueue( new TextEncoder().encode( JSON.stringify(cohereCompletionToOpenAI(data, model, 0)) ) ); controller.terminate(); } }) ); } else { let idx = 0; let tempChunk = ""; stream = stream.pipeThrough( new TransformStream({ async transform(chunk, controller) { const text = new TextDecoder().decode(chunk); const regex = /{([^}]+)}/g; let match; let extractedText = []; while ((match = regex.exec(text)) !== null) { let jsonStr = match[0]; extractedText.push(jsonStr); } try { while (extractedText.length > 0) { let jsonStr = extractedText.shift(); tempChunk += jsonStr; const data = JSON.parse(tempChunk); const ret = cohereEventConverter(idx, data, model); const eventData = JSON.stringify(ret.event ?