UNPKG

multi-lane-manager

Version:

Nacos 泳道管理与请求路由组件

605 lines (514 loc) 21.8 kB
import axios, { type Method as AxiosMethod, type AxiosRequestConfig, AxiosError } from 'axios'; import type { H3Event } from 'h3'; import { TLSSocket } from 'tls'; import type { ServiceInstanceInfo } from '../types'; import { getConfig } from './config'; import { DEFAULT_LANE_TARGET_HEADER, HEADER_PROXIED_BY, HEADER_ORIGINAL_LANE, HEADER_PROXIED_BY_VALUE, HEADER_LANE_DEBUG, HEADER_LANE_DETAIL, getSafeHeaderValue } from './defaults'; import { logger } from './logger'; import { getNacosLaneInstances } from './nacos'; /** * 负载均衡策略枚举 * 定义了可用的负载均衡算法 */ export enum LoadBalanceStrategy { RANDOM = 'random', // 随机选择实例 ROUND_ROBIN = 'round-robin', // 轮询选择实例 LEAST_CONNECTIONS = 'least-connections', // 选择连接数最少的实例 } // 记录每个服务的当前实例索引,用于轮询策略 const serviceIndices: Record<string, number> = {}; // 记录每个实例的连接数,用于最少连接策略 const instanceConnections: Record<string, number> = {}; // 记录故障实例,用于故障转移 const failedInstances: Record<string, { count: number; lastFailTime: number; blacklistUntil: number }> = {}; // 故障转移配置 const FAILURE_CONFIG = { MAX_RETRY_ATTEMPTS: 2, // 最大重试次数 FAILURE_THRESHOLD: 3, // 故障阈值(连续失败次数) BLACKLIST_DURATION: 30000, // 黑名单持续时间(30秒) HEALTH_CHECK_INTERVAL: 60000, // 健康检查间隔(60秒) CONNECTION_TIMEOUT: 5000, // 连接超时时间(5秒) RETRY_DELAY: 1000, // 重试延迟(1秒) }; /** * 根据负载均衡策略选择实例 * 从多个可用实例中选择一个用于处理请求 * * @param instances 实例列表 * @param serviceName 服务名称 * @param strategy 负载均衡策略 * @returns 选择的实例 */ export function selectInstance( instances: ServiceInstanceInfo[], serviceName: string, strategy: LoadBalanceStrategy = LoadBalanceStrategy.ROUND_ROBIN ): ServiceInstanceInfo { // 检查实例列表是否为空 if (instances.length === 0) { throw new Error('🚫 没有可用的服务实例'); } // 如果只有一个实例,直接返回 if (instances.length === 1) { return instances[0]; } // 根据策略选择实例 switch (strategy) { case LoadBalanceStrategy.RANDOM: // 随机选择一个实例 return instances[Math.floor(Math.random() * instances.length)]; case LoadBalanceStrategy.ROUND_ROBIN: // 轮询选择实例 if (!serviceIndices[serviceName]) { serviceIndices[serviceName] = 0; } // 计算当前索引并更新下一个索引 const index = serviceIndices[serviceName] % instances.length; serviceIndices[serviceName] = (serviceIndices[serviceName] + 1) % instances.length; return instances[index]; case LoadBalanceStrategy.LEAST_CONNECTIONS: // 选择连接数最少的实例 let minConnections = Number.MAX_SAFE_INTEGER; let selectedInstance = instances[0]; // 遍历所有实例,找出连接数最少的 for (const instance of instances) { const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`; const connections = instanceConnections[instanceKey] || 0; if (connections < minConnections) { minConnections = connections; selectedInstance = instance; } } return selectedInstance; default: // 默认返回第一个实例 return instances[0]; } } /** * 更新实例连接计数 * 用于跟踪每个实例的当前连接数,支持最少连接负载均衡策略 * * @param instance 服务实例 * @param increment 是否增加连接数 */ function updateInstanceConnections(instance: ServiceInstanceInfo, increment: boolean): void { // 创建实例的唯一键 const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`; // 确保连接计数存在 if (!instanceConnections[instanceKey]) { instanceConnections[instanceKey] = 0; } // 增加或减少连接计数 if (increment) { instanceConnections[instanceKey]++; } else { instanceConnections[instanceKey] = Math.max(0, instanceConnections[instanceKey] - 1); } } /** * 检查实例是否在黑名单中 * @param instance 服务实例 * @returns 是否在黑名单中 */ function isInstanceBlacklisted(instance: ServiceInstanceInfo): boolean { const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`; const failureInfo = failedInstances[instanceKey]; if (!failureInfo) { return false; } const now = Date.now(); return now < failureInfo.blacklistUntil; } /** * 记录实例故障 * @param instance 服务实例 * @param error 错误信息 */ function recordInstanceFailure(instance: ServiceInstanceInfo, error: Error): void { const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`; const now = Date.now(); if (!failedInstances[instanceKey]) { failedInstances[instanceKey] = { count: 0, lastFailTime: 0, blacklistUntil: 0 }; } const failureInfo = failedInstances[instanceKey]; failureInfo.count++; failureInfo.lastFailTime = now; // 如果故障次数达到阈值,将实例加入黑名单 if (failureInfo.count >= FAILURE_CONFIG.FAILURE_THRESHOLD) { failureInfo.blacklistUntil = now + FAILURE_CONFIG.BLACKLIST_DURATION; logger.warn(`⚠️ 实例 ${instanceKey} 故障次数达到阈值 ${FAILURE_CONFIG.FAILURE_THRESHOLD},加入黑名单 ${FAILURE_CONFIG.BLACKLIST_DURATION/1000} 秒`); logger.warn(`⚠️ 故障原因: ${error.message}`); } else { logger.warn(`⚠️ 实例 ${instanceKey} 故障 (${failureInfo.count}/${FAILURE_CONFIG.FAILURE_THRESHOLD}): ${error.message}`); } } /** * 重置实例故障计数(当实例恢复正常时调用) * @param instance 服务实例 */ function resetInstanceFailure(instance: ServiceInstanceInfo): void { const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`; if (failedInstances[instanceKey]) { const wasBlacklisted = isInstanceBlacklisted(instance); delete failedInstances[instanceKey]; if (wasBlacklisted) { logger.info(`✅ 实例 ${instanceKey} 已恢复,从黑名单中移除`); } } } /** * 检查错误类型是否应该触发故障转移 * @param error 错误对象 * @returns 是否应该故障转移 */ function shouldFailover(error: Error): boolean { if (error instanceof AxiosError) { // 网络连接错误 if (error.code === 'ECONNREFUSED' || error.code === 'ENOTFOUND' || error.code === 'ETIMEDOUT') { return true; } // HTTP状态码错误 if (error.response) { const status = error.response.status; // 502, 503, 504 等服务器错误应该触发故障转移 return status >= 502 && status <= 504; } // 请求超时 if (error.code === 'ECONNABORTED') { return true; } } return false; } /** * 过滤健康实例(排除黑名单中的实例) * @param instances 实例列表 * @returns 健康实例列表 */ function filterHealthyInstances(instances: ServiceInstanceInfo[]): ServiceInstanceInfo[] { return instances.filter(instance => !isInstanceBlacklisted(instance)); } /** * 带故障转移的代理请求函数 * 支持自动重试和故障转移到其他健康实例 * * @param event H3 事件 * @param targetInstance 目标服务实例 * @param isDebugMode 是否启用调试模式 * @param allInstances 所有可用实例(用于故障转移) * @returns 是否成功处理 */ export async function proxyRequestWithFailover( event: H3Event, targetInstance: ServiceInstanceInfo, isDebugMode: boolean = false, allInstances: ServiceInstanceInfo[] = [] ): Promise<boolean> { let lastError: Error | null = null; // 首先尝试原始目标实例 const instancesToTry = [targetInstance]; // 如果有其他实例可用,添加到重试列表中 if (allInstances.length > 1) { const healthyBackups = filterHealthyInstances( allInstances.filter(inst => inst.ip !== targetInstance.ip || inst.port !== targetInstance.port ) ); instancesToTry.push(...healthyBackups.slice(0, FAILURE_CONFIG.MAX_RETRY_ATTEMPTS)); } logger.info(`🔄 开始代理请求,可尝试实例数: ${instancesToTry.length}`); for (let attempt = 0; attempt < instancesToTry.length; attempt++) { const currentInstance = instancesToTry[attempt]; const isRetry = attempt > 0; // 检查实例是否在黑名单中 if (isInstanceBlacklisted(currentInstance)) { logger.warn(`⚠️ 跳过黑名单实例: ${currentInstance.ip}:${currentInstance.port}`); continue; } try { if (isRetry) { logger.info(`🔄 故障转移到实例: ${currentInstance.ip}:${currentInstance.port} (尝试 ${attempt + 1}/${instancesToTry.length})`); // 重试前等待一段时间 await new Promise(resolve => setTimeout(resolve, FAILURE_CONFIG.RETRY_DELAY)); } const success = await proxyRequestToInstance(event, currentInstance, isDebugMode, isRetry); if (success) { // 请求成功,重置故障计数 resetInstanceFailure(currentInstance); if (isRetry) { logger.info(`✅ 故障转移成功,使用实例: ${currentInstance.ip}:${currentInstance.port}`); } return true; } } catch (error) { lastError = error instanceof Error ? error : new Error(String(error)); // 记录实例故障 recordInstanceFailure(currentInstance, lastError); // 检查是否应该继续故障转移 if (!shouldFailover(lastError)) { logger.warn(`⚠️ 错误类型不适合故障转移: ${lastError.message}`); break; } logger.warn(`⚠️ 实例 ${currentInstance.ip}:${currentInstance.port} 请求失败: ${lastError.message}`); // 如果这是最后一次尝试,不再继续 if (attempt === instancesToTry.length - 1) { logger.error(`❌ 所有实例都已尝试,请求最终失败`); break; } } } // 所有实例都失败了,返回最后的错误 return handleFinalFailure(event, lastError, isDebugMode); } /** * 处理最终失败的情况 * @param event H3 事件 * @param error 最后的错误 * @param isDebugMode 是否启用调试模式 * @returns 是否成功处理 */ async function handleFinalFailure(event: H3Event, error: Error | null, isDebugMode: boolean): Promise<boolean> { // 检查响应头是否已发送 if (event.node.res.headersSent) { logger.warn(`⚠️ 响应头已发送,无法返回错误响应`); event.context._laneManagerHandled = true; return false; } try { // 如果启用了调试模式,添加调试信息 if (isDebugMode) { const debugInfo: string[] = []; debugInfo.push(`故障转移时间: ${new Date().toISOString()}`); debugInfo.push(`最终错误: ${error?.message || '所有实例都不可用'}`); debugInfo.push(`状态: 所有实例故障转移失败`); try { event.node.res.setHeader(HEADER_LANE_DETAIL, getSafeHeaderValue(debugInfo)); } catch (headerError) { logger.error(`❌ 设置调试响应头时出错: ${headerError instanceof Error ? headerError.message : String(headerError)}`); } } // 设置状态码和内容类型 event.node.res.statusCode = 503; // Service Unavailable event.node.res.setHeader('Content-Type', 'text/plain'); // 发送错误响应 const errorMessage = `服务暂时不可用,所有实例都无法访问。请稍后重试。`; event.node.res.end(errorMessage); logger.error(`❌ 发送服务不可用响应`); } catch (responseError) { logger.error(`❌ 发送错误响应时出错: ${responseError instanceof Error ? responseError.message : String(responseError)}`); } // 标记请求已处理 event.context._laneManagerHandled = true; return false; } /** * 转发请求到指定实例(原始实现) * 将当前请求转发到目标服务实例,并将响应返回给客户端 * * @param event H3 事件 * @param targetInstance 目标服务实例 * @param isDebugMode 是否启用调试模式 * @param isRetry 是否为重试请求 * @returns 是否成功处理 */ async function proxyRequestToInstance( event: H3Event, targetInstance: ServiceInstanceInfo, isDebugMode: boolean = false, isRetry: boolean = false ): Promise<boolean> { const config = getConfig(); const requestPath = event.node.req.url || ''; const targetUrl = `http://${targetInstance.ip}:${targetInstance.port}${requestPath}`; logger.info(`🔄 代理请求到目标实例: ${targetInstance.serviceName}@${targetInstance.ip}:${targetInstance.port} (泳道: ${targetInstance.metadata.laneId}), URL: ${targetUrl}`); // 更新实例连接计数 updateInstanceConnections(targetInstance, true); try { // 准备请求头 const newHeaders: Record<string, string | string[] | undefined> = {}; // 复制原始请求头,排除 host 和目标泳道头 for (const key in event.node.req.headers) { if (key !== 'host' && key !== config.targetLaneHeaderKey) { newHeaders[key] = event.node.req.headers[key]; } } // 添加转发相关的头 const remoteAddress = event.node.req.socket?.remoteAddress; newHeaders['X-Forwarded-For'] = remoteAddress || event.node.req.headers['x-forwarded-for'] || ''; newHeaders['X-Forwarded-Host'] = event.node.req.headers['host'] || ''; // 确定协议 const proto = event.node.req.headers['x-forwarded-proto'] || (event.node.req.socket instanceof TLSSocket && event.node.req.socket.encrypted ? 'https' : 'http'); newHeaders['X-Forwarded-Proto'] = proto; // 准备请求配置 const proxyRequestConfig: AxiosRequestConfig = { method: event.node.req.method as AxiosMethod, url: targetUrl, headers: newHeaders, responseType: 'stream', validateStatus: () => true, // 接受任何状态码 timeout: config.proxyTimeout, }; // 对于包含请求体的方法,添加请求体 const bodyMethods = ['POST', 'PUT', 'PATCH', 'DELETE']; if (event.node.req.method && bodyMethods.includes(event.node.req.method.toUpperCase())) { proxyRequestConfig.data = event.node.req; } logger.debug('📋 代理请求配置:', { ...proxyRequestConfig, data: proxyRequestConfig.data ? '<REQUEST_STREAM>' : undefined, }); // 发送代理请求 const proxyRes = await axios.request(proxyRequestConfig); logger.info(`📥 代理响应状态: ${proxyRes.status}`); // 处理响应 await new Promise<void>((resolve, reject) => { try { // 1. 设置状态码 event.node.res.statusCode = proxyRes.status; // 2. 设置响应头 const headersToSet = new Map<string, string | string[] | number>(); // 复制原始响应头 Object.entries(proxyRes.headers).forEach(([key, value]) => { if (value !== undefined && value !== null) { headersToSet.set(key, value as string | string[] | number); } }); // 添加自定义头 headersToSet.set(HEADER_PROXIED_BY, HEADER_PROXIED_BY_VALUE); headersToSet.set(HEADER_ORIGINAL_LANE, config.currentLaneId); headersToSet.set(DEFAULT_LANE_TARGET_HEADER, targetInstance.metadata.laneId); // 如果启用了调试模式,添加调试信息 if (isDebugMode) { const debugInfo: string[] = []; debugInfo.push(`代理时间: ${new Date().toISOString()}`); debugInfo.push(`代理目标: ${targetInstance.ip}:${targetInstance.port}`); debugInfo.push(`目标泳道: ${targetInstance.metadata.laneId}`); debugInfo.push(`响应状态: ${proxyRes.status}`); debugInfo.push(`响应大小: ${proxyRes.headers['content-length'] || '未知'}`); try { headersToSet.set(HEADER_LANE_DETAIL, getSafeHeaderValue(debugInfo)); } catch (error) { logger.error(`❌ 设置调试响应头时出错: ${error instanceof Error ? error.message : String(error)}`); } } // 注意:根据需求,我们不设置泳道ID的cookie // 只保留原始响应中的cookie(如果有) // 检查响应头是否已发送 if (event.node.res.headersSent) { logger.warn(`⚠️ 响应头已发送,无法设置新的响应头`); console.log(`[multi-lane-manager] ⚠️ 响应头已发送,无法设置新的响应头`); } // 设置所有头 for (const [key, value] of headersToSet.entries()) { try { if (!event.node.res.headersSent) { event.node.res.setHeader(key, value); logger.debug(`✅ 成功设置响应头: ${key}=${value}`); } else { logger.warn(`⚠️ 无法设置响应头 ${key},响应头已发送`); console.log(`[multi-lane-manager] ⚠️ 无法设置响应头 ${key},响应头已发送`); } } catch (headerError) { logger.warn(`⚠️ 设置响应头 ${key} 失败: ${headerError instanceof Error ? headerError.message : String(headerError)}`); console.log(`[multi-lane-manager] ⚠️ 设置响应头 ${key} 失败: ${headerError instanceof Error ? headerError.message : String(headerError)}`); } } // 3. 设置数据流 proxyRes.data.on('error', (err: Error) => { logger.error(`❌ 代理响应数据流错误: ${err.message}`); updateInstanceConnections(targetInstance, false); reject(err); }); proxyRes.data.on('end', () => { logger.debug('✅ 代理响应数据流结束'); updateInstanceConnections(targetInstance, false); resolve(); }); // 标记请求已处理,确保不会再次处理 event.context._laneManagerHandled = true; // 将代理响应的数据流式传输到原始响应 // 确保在设置所有响应头之后再开始发送响应体 proxyRes.data.pipe(event.node.res); } catch (err) { updateInstanceConnections(targetInstance, false); reject(err); } }); // 请求已在数据流处理中被标记为已处理 return true; } catch (error) { // 更新实例连接计数 updateInstanceConnections(targetInstance, false); logger.error(`❌ 代理请求到 ${targetUrl} 失败: ${error instanceof Error ? error.message : String(error)}`); // 检查响应头是否已发送 if (event.node.res.headersSent) { logger.warn(`⚠️ 响应头已发送,无法返回错误响应`); console.log(`[multi-lane-manager] ⚠️ 响应头已发送,无法返回错误响应`); // 标记请求已处理 event.context._laneManagerHandled = true; return false; } // 如果响应头尚未发送,返回错误响应 try { // 如果启用了调试模式,添加调试信息 if (isDebugMode) { const debugInfo: string[] = []; debugInfo.push(`代理时间: ${new Date().toISOString()}`); debugInfo.push(`代理目标: ${targetInstance.ip}:${targetInstance.port}`); debugInfo.push(`目标泳道: ${targetInstance.metadata.laneId}`); debugInfo.push(`错误: ${error instanceof Error ? error.message : '未知代理错误'}`); try { event.node.res.setHeader(HEADER_LANE_DETAIL, getSafeHeaderValue(debugInfo)); logger.debug(`✅ 成功设置错误响应头: ${HEADER_LANE_DETAIL}`); } catch (headerError) { logger.error(`❌ 设置调试响应头时出错: ${headerError instanceof Error ? headerError.message : String(headerError)}`); console.log(`[multi-lane-manager] ❌ 设置调试响应头时出错: ${headerError instanceof Error ? headerError.message : String(headerError)}`); } } // 设置状态码和内容类型 event.node.res.statusCode = 502; // Bad Gateway event.node.res.setHeader('Content-Type', 'text/plain'); // 发送错误响应 const errorMessage = `代理请求到泳道 ${targetInstance.metadata.laneId} 时出错。原因: ${error instanceof Error ? error.message : '未知代理错误'}`; event.node.res.end(errorMessage); logger.debug(`✅ 成功发送错误响应`); } catch (responseError) { logger.error(`❌ 发送错误响应时出错: ${responseError instanceof Error ? responseError.message : String(responseError)}`); console.log(`[multi-lane-manager] ❌ 发送错误响应时出错: ${responseError instanceof Error ? responseError.message : String(responseError)}`); } // 标记请求已处理 event.context._laneManagerHandled = true; return false; } } /** * 兼容性导出:原始的代理请求函数 * 为了保持向后兼容性,保留原始的proxyRequest函数 * * @param event H3 事件 * @param targetInstance 目标服务实例 * @param isDebugMode 是否启用调试模式 * @returns 是否成功处理 */ export async function proxyRequest( event: H3Event, targetInstance: ServiceInstanceInfo, isDebugMode: boolean = false ): Promise<boolean> { // 调用带故障转移的版本,但不提供其他实例(保持原始行为) return proxyRequestToInstance(event, targetInstance, isDebugMode, false); }