UNPKG

@kdump/code-cli-any-llm

Version:

> A unified gateway for the Gemini, opencode, crush, and Qwen Code AI CLIs

549 lines 26.1 kB
"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; var __param = (this && this.__param) || function (paramIndex, decorator) { return function (target, key) { decorator(target, key, paramIndex); } }; var GeminiController_1; Object.defineProperty(exports, "__esModule", { value: true }); exports.GeminiController = void 0; const common_1 = require("@nestjs/common"); const config_1 = require("@nestjs/config"); const gemini_request_dto_1 = require("../models/gemini/gemini-request.dto"); const request_transformer_1 = require("../transformers/request.transformer"); const response_transformer_1 = require("../transformers/response.transformer"); const stream_transformer_1 = require("../transformers/stream.transformer"); const enhanced_request_transformer_1 = require("../transformers/enhanced-request.transformer"); const enhanced_response_transformer_1 = require("../transformers/enhanced-response.transformer"); const tokenizer_service_1 = require("../services/tokenizer.service"); const llm_provider_resolver_service_1 = require("../services/llm-provider-resolver.service"); let GeminiController = GeminiController_1 = class GeminiController { requestTransformer; responseTransformer; streamTransformer; enhancedRequestTransformer; enhancedResponseTransformer; tokenizerService; configService; providerResolver; logger = new common_1.Logger(GeminiController_1.name); isUsingZhipuModel = false; useCodexProvider = false; useClaudeCodeProvider = false; llmProvider; aiProvider; gatewayApiMode = 'gemini'; initialized = false; activeProviderConfig; constructor(requestTransformer, responseTransformer, streamTransformer, enhancedRequestTransformer, enhancedResponseTransformer, tokenizerService, configService, providerResolver) { this.requestTransformer = requestTransformer; this.responseTransformer = responseTransformer; this.streamTransformer = streamTransformer; this.enhancedRequestTransformer = enhancedRequestTransformer; this.enhancedResponseTransformer = enhancedResponseTransformer; this.tokenizerService = tokenizerService; this.configService = configService; this.providerResolver = providerResolver; } onApplicationBootstrap() { this.initializeProvider(); } initializeProvider() { if (this.initialized) { return; } const context = this.providerResolver.resolve(); this.gatewayApiMode = context.gatewayApiMode; this.aiProvider = context.aiProvider; this.useCodexProvider = context.useCodexProvider; this.useClaudeCodeProvider = context.useClaudeCodeProvider; this.llmProvider = context.provider; this.activeProviderConfig = context.providerConfig; const config = this.getActiveProviderConfig(); const providerName = this.useCodexProvider ? 'Codex' : this.useClaudeCodeProvider ? 'Claude Code' : 'OpenAI'; this.logger.log(`=== ${providerName} Configuration ===`); if (config) { const apiKey = config.apiKey; const baseURL = config.baseURL; const model = config.model; this.logger.log(`API Key: ${apiKey ? apiKey.substring(0, 20) + '...' : 'Not set'}`); this.logger.log(`Base URL: ${baseURL ?? 'Not set'}`); this.logger.log(`Model: ${model ?? 'Not set'}`); } else { this.logger.log('No provider configuration found.'); } this.logger.log('=========================='); const configuredModel = this.providerResolver.resolveDefaultModel(context); this.isUsingZhipuModel = !this.useCodexProvider && !this.useClaudeCodeProvider && this.enhancedRequestTransformer.isZhipuModel(configuredModel); this.logger.log(`=== Zhipu Optimization ===`); this.logger.log(`Configured Model: ${configuredModel}`); this.logger.log(`Is Zhipu Model: ${this.isUsingZhipuModel}`); this.logger.log(`Using Enhanced Transformers: ${this.isUsingZhipuModel ? 'YES' : 'NO'}`); this.logger.log('==========================='); this.logger.log(`Gateway API Mode: ${this.gatewayApiMode.toUpperCase()}`); this.initialized = true; } getRequestTransformer() { if (this.isUsingZhipuModel) { this.logger.debug(`Using enhanced request transformer for Zhipu model`); return this.enhancedRequestTransformer; } this.logger.debug(`Using standard request transformer`); return this.requestTransformer; } getResponseTransformer() { if (this.isUsingZhipuModel) { this.logger.debug(`Using enhanced response transformer for Zhipu model`); return this.enhancedResponseTransformer; } this.logger.debug(`Using standard response transformer`); return this.responseTransformer; } getActiveProviderConfig() { if (this.activeProviderConfig) { return this.activeProviderConfig; } const key = this.useCodexProvider ? 'codex' : this.useClaudeCodeProvider ? 'claudeCode' : 'openai'; const config = this.configService.get(key); this.activeProviderConfig = config; return config; } computePromptTokens(request, model) { let totalTokens = this.tokenizerService.countTokensInRequest(request.contents || [], model); const systemInstruction = request.systemInstruction; if (typeof systemInstruction === 'string') { totalTokens += this.tokenizerService.countTokens(systemInstruction, model); } else if (systemInstruction?.parts) { totalTokens += this.tokenizerService.countTokensInRequest([systemInstruction], model); } return totalTokens; } async handleModelRequest(model, alt, thoughtSignature, request, response) { this.initializeProvider(); try { const isStreamRequest = alt === 'streamGenerateContent' || model.endsWith(':streamGenerateContent'); const isGenerateRequest = alt === 'generateContent' || model.endsWith(':generateContent'); const isCountTokensRequest = alt === 'countTokens' || model.endsWith(':countTokens'); if (!isStreamRequest && !isGenerateRequest && !isCountTokensRequest) { throw new Error('Invalid request. Expected generateContent, streamGenerateContent or countTokens action.'); } const actualModel = model.replace(/:(generateContent|streamGenerateContent|countTokens)$/, ''); const config = this.getActiveProviderConfig(); const targetModel = config?.model || (this.useCodexProvider ? 'gpt-5-codex' : this.useClaudeCodeProvider ? 'claude-sonnet-4-5-20250929' : 'glm-4.5'); this.logger.debug(`Mapping model ${actualModel} to ${targetModel}`); if (this.useClaudeCodeProvider) { if (isStreamRequest) { await this.handleClaudeStream(request, response, targetModel, thoughtSignature); return; } if (isGenerateRequest) { this.logger.debug(`Received generateContent request (Claude Code) for model: ${actualModel}`); const claudeProvider = this.llmProvider; const claudeResponse = await claudeProvider.generateFromGemini(request, targetModel); if (thoughtSignature) { claudeResponse.thoughtSignature = thoughtSignature; } response.status(200).json(claudeResponse); return; } } if (isStreamRequest) { this.logger.debug(`Received streamGenerateContent request for model: ${actualModel}`); response.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', Connection: 'keep-alive', 'X-Accel-Buffering': 'no', 'Transfer-Encoding': 'chunked', }); if (typeof response.flushHeaders === 'function') { response.flushHeaders(); } const requestTransformer = this.getRequestTransformer(); const openAIRequest = requestTransformer.transformRequest(request, targetModel); openAIRequest.stream = true; const promptTokenCount = this.computePromptTokens(request, targetModel); this.streamTransformer.initializeForModel(targetModel, promptTokenCount); response.on('close', () => { this.logger.debug('Client disconnected'); }); response.on('error', (err) => { this.logger.error('Response error:', err); }); const provider = this.llmProvider; void (async () => { try { for await (const chunk of provider.generateContentStream(openAIRequest)) { if (response.destroyed || response.closed) { this.logger.debug('Response closed, stopping stream'); break; } const geminiChunk = this.streamTransformer.transformStreamChunk(chunk); const chunkList = this.splitThoughtAndContentChunks(geminiChunk); let shouldStopProcessing = false; for (const chunkToSend of chunkList) { this.logStreamChunk('stream', chunkToSend); if (thoughtSignature) { chunkToSend.thoughtSignature = thoughtSignature; } const sseData = this.streamTransformer.toSSEFormat(chunkToSend); if (sseData && sseData.trim() && !response.destroyed && !response.closed) { const writeSuccess = response.write(sseData); if (!writeSuccess) { this.logger.warn('Write buffer full, waiting for drain'); await new Promise((resolve) => { response.once('drain', resolve); setTimeout(resolve, 5000); }); } } else if (!sseData || !sseData.trim()) { continue; } else { shouldStopProcessing = true; break; } } if (shouldStopProcessing) { break; } } const bufferedText = this.streamTransformer.getBufferedText(); if (bufferedText && !response.destroyed && !response.closed) { const finalChunk = { candidates: [ { content: { role: 'model', parts: [{ text: bufferedText }], }, index: 0, finishReason: 'STOP', }, ], }; if (thoughtSignature) { finalChunk.thoughtSignature = thoughtSignature; } this.streamTransformer.applyUsageMetadata(finalChunk); const finalChunks = this.splitThoughtAndContentChunks(finalChunk); for (const chunkToSend of finalChunks) { this.logStreamChunk('stream-final', chunkToSend); if (thoughtSignature) { chunkToSend.thoughtSignature = thoughtSignature; } const finalSSEData = this.streamTransformer.toSSEFormat(chunkToSend); if (finalSSEData && finalSSEData.trim()) { response.write(finalSSEData); } } } if (!response.destroyed && !response.closed) { try { const endMarker = this.streamTransformer.createSSEEndMarker(); if (endMarker) { response.write(endMarker); } response.end(); } catch (endError) { this.logger.error('Error ending response:', endError); try { response.end(); } catch (e) { this.logger.error('Failed to end response:', e); } } } } catch (error) { this.logger.error('Stream processing error:', error); if (!response.destroyed && !response.closed) { response.write(`data: ${JSON.stringify({ error: { code: 'STREAM_ERROR', message: error.message, }, })}\n\n`); response.end(); } } })(); } else if (isCountTokensRequest) { this.logger.debug(`Received countTokens request for model: ${actualModel}`); const tokenCount = this.computePromptTokens(request, targetModel); response.status(200).json({ totalTokens: tokenCount, }); } else { this.logger.debug(`Received generateContent request for model: ${actualModel}`); this.logger.debug(`Request path: /api/v1/models/${model}`); const requestTransformer = this.getRequestTransformer(); const openAIRequest = requestTransformer.transformRequest(request, targetModel); const openAIResponse = await this.llmProvider.generateContent(openAIRequest); const responseTransformer = this.getResponseTransformer(); const geminiResponse = responseTransformer.transformResponse(openAIResponse); if (thoughtSignature) { geminiResponse.thoughtSignature = thoughtSignature; } this.logger.debug('=== Transformed Gemini Response ==='); this.logger.debug(`Response: ${JSON.stringify(geminiResponse, null, 2)}`); this.logger.debug('====================================='); this.logger.debug('=== About to return response to client ==='); this.logger.debug('====================================='); response.status(200).json(geminiResponse); return; } } catch (error) { this.logger.error(`Error in model request for ${model}:`, error); throw error; } } splitThoughtAndContentChunks(chunk) { const candidates = chunk?.candidates; if (!Array.isArray(candidates) || candidates.length === 0) { return [chunk]; } const [candidate] = candidates; const contentInfo = candidate?.content; const parts = contentInfo?.parts; if (!contentInfo || !Array.isArray(parts) || parts.length === 0) { return [chunk]; } const thoughtParts = parts .filter((part) => part.thought) .map((part) => ({ ...part })); const contentParts = parts .filter((part) => !part.thought) .map((part) => ({ ...part })); if (thoughtParts.length === 0 || contentParts.length === 0) { return [chunk]; } const baseContent = { ...contentInfo }; const normalizedThoughtParts = this.normalizeParts(thoughtParts); const normalizedContentParts = this.normalizeParts(contentParts, { prependLeadingNewline: true, }); const createChunk = (selectedParts, options) => ({ ...chunk, candidates: [ { ...candidate, content: { ...baseContent, parts: selectedParts }, finishReason: options?.isThoughtChunk ? undefined : candidate.finishReason, }, ], }); const thoughtChunk = createChunk(normalizedThoughtParts, { isThoughtChunk: true, }); const contentChunk = createChunk(normalizedContentParts); return [thoughtChunk, contentChunk]; } async handleClaudeStream(request, response, targetModel, thoughtSignature) { this.logger.debug(`Received streamGenerateContent request (Claude Code) for model: ${targetModel}`); response.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', Connection: 'keep-alive', 'X-Accel-Buffering': 'no', 'Transfer-Encoding': 'chunked', }); if (typeof response.flushHeaders === 'function') { response.flushHeaders(); } this.streamTransformer.reset(); response.on('close', () => { this.logger.debug('Client disconnected (Claude stream)'); }); response.on('error', (err) => { this.logger.error('Response error (Claude stream):', err); }); const provider = this.llmProvider; try { for await (const chunk of provider.streamFromGemini(request, targetModel)) { if (response.destroyed || response.closed) { this.logger.debug('Response closed, stopping Claude stream'); break; } const chunkList = this.splitThoughtAndContentChunks(chunk); let shouldStop = false; for (const chunkToSend of chunkList) { this.logStreamChunk('stream', chunkToSend); if (thoughtSignature) { chunkToSend.thoughtSignature = thoughtSignature; } const sseData = this.streamTransformer.toSSEFormat(chunkToSend); if (!sseData || !sseData.trim()) { continue; } const writeSuccess = response.write(sseData); if (!writeSuccess) { this.logger.warn('Claude stream backpressure detected, waiting for drain'); await new Promise((resolve) => { response.once('drain', resolve); setTimeout(resolve, 5000); }); if (response.destroyed || response.closed) { shouldStop = true; break; } } } if (shouldStop) { break; } } if (!response.destroyed && !response.closed) { const endMarker = this.streamTransformer.createSSEEndMarker(); if (endMarker && endMarker.trim()) { response.write(endMarker); } response.end(); } } catch (error) { this.logger.error('Claude stream processing error:', error); if (!response.destroyed && !response.closed) { response.write(`data: ${JSON.stringify({ error: { code: 'STREAM_ERROR', message: error.message, }, })}\n\n`); response.end(); } } } normalizeParts(parts, options) { const normalized = []; let lastText; for (const part of parts) { const partText = typeof part.text === 'string' ? part.text : undefined; if (partText !== undefined) { let text = partText; if (options?.prependLeadingNewline && normalized.length === 0) { text = text.startsWith('\n') ? text : `\n${text}`; } if (text === lastText) { continue; } lastText = text; const cloned = { ...part }; cloned.text = text; normalized.push(cloned); } else { normalized.push({ ...part }); lastText = undefined; } } return normalized; } logStreamChunk(label, chunk) { const candidate = chunk?.candidates?.[0]; if (!candidate) { this.logger.debug(`[Stream][${label}] empty candidate`); return; } const parts = candidate.content?.parts; let preview; if (Array.isArray(parts) && parts.length > 0) { preview = parts .map((part) => { const isThought = Boolean(part.thought); const text = typeof part.text === 'string' ? part.text : undefined; if (text) { return `${isThought ? '[thought]' : '[content]'}${this.sanitizeForLog(text, 120)}`; } if (part.functionCall) { return '[function-call]'; } return '[non-text]'; }) .join(' | '); } this.logger.debug(`[Stream][${label}] id=${chunk.responseId ?? 'unknown'} preview=${preview ?? '∅'} finish=${candidate.finishReason ?? '∅'}`); } sanitizeForLog(value, maxLength = 500) { if (typeof value === 'string') { return value.length > maxLength ? `${value.slice(0, maxLength)}…` : value; } try { const serialized = JSON.stringify(value); return serialized.length > maxLength ? `${serialized.slice(0, maxLength)}…` : serialized; } catch { const fallback = String(value); return fallback.length > maxLength ? `${fallback.slice(0, maxLength)}…` : fallback; } } }; exports.GeminiController = GeminiController; __decorate([ (0, common_1.Post)('models/:model'), __param(0, (0, common_1.Param)('model')), __param(1, (0, common_1.Query)('alt')), __param(2, (0, common_1.Query)('thought_signature')), __param(3, (0, common_1.Body)()), __param(4, (0, common_1.Res)()), __metadata("design:type", Function), __metadata("design:paramtypes", [String, String, String, gemini_request_dto_1.GeminiRequestDto, Object]), __metadata("design:returntype", Promise) ], GeminiController.prototype, "handleModelRequest", null); exports.GeminiController = GeminiController = GeminiController_1 = __decorate([ (0, common_1.Controller)('gemini'), __metadata("design:paramtypes", [request_transformer_1.RequestTransformer, response_transformer_1.ResponseTransformer, stream_transformer_1.StreamTransformer, enhanced_request_transformer_1.EnhancedRequestTransformer, enhanced_response_transformer_1.EnhancedResponseTransformer, tokenizer_service_1.TokenizerService, config_1.ConfigService, llm_provider_resolver_service_1.LlmProviderResolverService]) ], GeminiController); //# sourceMappingURL=gemini.controller.js.map