multi-lane-manager
Version:
Nacos 泳道管理与请求路由组件
605 lines (514 loc) • 21.8 kB
text/typescript
import axios, { type Method as AxiosMethod, type AxiosRequestConfig, AxiosError } from 'axios';
import type { H3Event } from 'h3';
import { TLSSocket } from 'tls';
import type { ServiceInstanceInfo } from '../types';
import { getConfig } from './config';
import {
DEFAULT_LANE_TARGET_HEADER,
HEADER_PROXIED_BY,
HEADER_ORIGINAL_LANE,
HEADER_PROXIED_BY_VALUE,
HEADER_LANE_DEBUG,
HEADER_LANE_DETAIL,
getSafeHeaderValue
} from './defaults';
import { logger } from './logger';
import { getNacosLaneInstances } from './nacos';
/**
* 负载均衡策略枚举
* 定义了可用的负载均衡算法
*/
export enum LoadBalanceStrategy {
RANDOM = 'random', // 随机选择实例
ROUND_ROBIN = 'round-robin', // 轮询选择实例
LEAST_CONNECTIONS = 'least-connections', // 选择连接数最少的实例
}
// 记录每个服务的当前实例索引,用于轮询策略
const serviceIndices: Record<string, number> = {};
// 记录每个实例的连接数,用于最少连接策略
const instanceConnections: Record<string, number> = {};
// 记录故障实例,用于故障转移
const failedInstances: Record<string, { count: number; lastFailTime: number; blacklistUntil: number }> = {};
// 故障转移配置
const FAILURE_CONFIG = {
MAX_RETRY_ATTEMPTS: 2, // 最大重试次数
FAILURE_THRESHOLD: 3, // 故障阈值(连续失败次数)
BLACKLIST_DURATION: 30000, // 黑名单持续时间(30秒)
HEALTH_CHECK_INTERVAL: 60000, // 健康检查间隔(60秒)
CONNECTION_TIMEOUT: 5000, // 连接超时时间(5秒)
RETRY_DELAY: 1000, // 重试延迟(1秒)
};
/**
* 根据负载均衡策略选择实例
* 从多个可用实例中选择一个用于处理请求
*
* @param instances 实例列表
* @param serviceName 服务名称
* @param strategy 负载均衡策略
* @returns 选择的实例
*/
export function selectInstance(
instances: ServiceInstanceInfo[],
serviceName: string,
strategy: LoadBalanceStrategy = LoadBalanceStrategy.ROUND_ROBIN
): ServiceInstanceInfo {
// 检查实例列表是否为空
if (instances.length === 0) {
throw new Error('🚫 没有可用的服务实例');
}
// 如果只有一个实例,直接返回
if (instances.length === 1) {
return instances[0];
}
// 根据策略选择实例
switch (strategy) {
case LoadBalanceStrategy.RANDOM:
// 随机选择一个实例
return instances[Math.floor(Math.random() * instances.length)];
case LoadBalanceStrategy.ROUND_ROBIN:
// 轮询选择实例
if (!serviceIndices[serviceName]) {
serviceIndices[serviceName] = 0;
}
// 计算当前索引并更新下一个索引
const index = serviceIndices[serviceName] % instances.length;
serviceIndices[serviceName] = (serviceIndices[serviceName] + 1) % instances.length;
return instances[index];
case LoadBalanceStrategy.LEAST_CONNECTIONS:
// 选择连接数最少的实例
let minConnections = Number.MAX_SAFE_INTEGER;
let selectedInstance = instances[0];
// 遍历所有实例,找出连接数最少的
for (const instance of instances) {
const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`;
const connections = instanceConnections[instanceKey] || 0;
if (connections < minConnections) {
minConnections = connections;
selectedInstance = instance;
}
}
return selectedInstance;
default:
// 默认返回第一个实例
return instances[0];
}
}
/**
* 更新实例连接计数
* 用于跟踪每个实例的当前连接数,支持最少连接负载均衡策略
*
* @param instance 服务实例
* @param increment 是否增加连接数
*/
function updateInstanceConnections(instance: ServiceInstanceInfo, increment: boolean): void {
// 创建实例的唯一键
const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`;
// 确保连接计数存在
if (!instanceConnections[instanceKey]) {
instanceConnections[instanceKey] = 0;
}
// 增加或减少连接计数
if (increment) {
instanceConnections[instanceKey]++;
} else {
instanceConnections[instanceKey] = Math.max(0, instanceConnections[instanceKey] - 1);
}
}
/**
* 检查实例是否在黑名单中
* @param instance 服务实例
* @returns 是否在黑名单中
*/
function isInstanceBlacklisted(instance: ServiceInstanceInfo): boolean {
const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`;
const failureInfo = failedInstances[instanceKey];
if (!failureInfo) {
return false;
}
const now = Date.now();
return now < failureInfo.blacklistUntil;
}
/**
* 记录实例故障
* @param instance 服务实例
* @param error 错误信息
*/
function recordInstanceFailure(instance: ServiceInstanceInfo, error: Error): void {
const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`;
const now = Date.now();
if (!failedInstances[instanceKey]) {
failedInstances[instanceKey] = {
count: 0,
lastFailTime: 0,
blacklistUntil: 0
};
}
const failureInfo = failedInstances[instanceKey];
failureInfo.count++;
failureInfo.lastFailTime = now;
// 如果故障次数达到阈值,将实例加入黑名单
if (failureInfo.count >= FAILURE_CONFIG.FAILURE_THRESHOLD) {
failureInfo.blacklistUntil = now + FAILURE_CONFIG.BLACKLIST_DURATION;
logger.warn(`⚠️ 实例 ${instanceKey} 故障次数达到阈值 ${FAILURE_CONFIG.FAILURE_THRESHOLD},加入黑名单 ${FAILURE_CONFIG.BLACKLIST_DURATION/1000} 秒`);
logger.warn(`⚠️ 故障原因: ${error.message}`);
} else {
logger.warn(`⚠️ 实例 ${instanceKey} 故障 (${failureInfo.count}/${FAILURE_CONFIG.FAILURE_THRESHOLD}): ${error.message}`);
}
}
/**
* 重置实例故障计数(当实例恢复正常时调用)
* @param instance 服务实例
*/
function resetInstanceFailure(instance: ServiceInstanceInfo): void {
const instanceKey = `${instance.serviceName}:${instance.ip}:${instance.port}`;
if (failedInstances[instanceKey]) {
const wasBlacklisted = isInstanceBlacklisted(instance);
delete failedInstances[instanceKey];
if (wasBlacklisted) {
logger.info(`✅ 实例 ${instanceKey} 已恢复,从黑名单中移除`);
}
}
}
/**
* 检查错误类型是否应该触发故障转移
* @param error 错误对象
* @returns 是否应该故障转移
*/
function shouldFailover(error: Error): boolean {
if (error instanceof AxiosError) {
// 网络连接错误
if (error.code === 'ECONNREFUSED' || error.code === 'ENOTFOUND' || error.code === 'ETIMEDOUT') {
return true;
}
// HTTP状态码错误
if (error.response) {
const status = error.response.status;
// 502, 503, 504 等服务器错误应该触发故障转移
return status >= 502 && status <= 504;
}
// 请求超时
if (error.code === 'ECONNABORTED') {
return true;
}
}
return false;
}
/**
* 过滤健康实例(排除黑名单中的实例)
* @param instances 实例列表
* @returns 健康实例列表
*/
function filterHealthyInstances(instances: ServiceInstanceInfo[]): ServiceInstanceInfo[] {
return instances.filter(instance => !isInstanceBlacklisted(instance));
}
/**
* 带故障转移的代理请求函数
* 支持自动重试和故障转移到其他健康实例
*
* @param event H3 事件
* @param targetInstance 目标服务实例
* @param isDebugMode 是否启用调试模式
* @param allInstances 所有可用实例(用于故障转移)
* @returns 是否成功处理
*/
export async function proxyRequestWithFailover(
event: H3Event,
targetInstance: ServiceInstanceInfo,
isDebugMode: boolean = false,
allInstances: ServiceInstanceInfo[] = []
): Promise<boolean> {
let lastError: Error | null = null;
// 首先尝试原始目标实例
const instancesToTry = [targetInstance];
// 如果有其他实例可用,添加到重试列表中
if (allInstances.length > 1) {
const healthyBackups = filterHealthyInstances(
allInstances.filter(inst =>
inst.ip !== targetInstance.ip || inst.port !== targetInstance.port
)
);
instancesToTry.push(...healthyBackups.slice(0, FAILURE_CONFIG.MAX_RETRY_ATTEMPTS));
}
logger.info(`🔄 开始代理请求,可尝试实例数: ${instancesToTry.length}`);
for (let attempt = 0; attempt < instancesToTry.length; attempt++) {
const currentInstance = instancesToTry[attempt];
const isRetry = attempt > 0;
// 检查实例是否在黑名单中
if (isInstanceBlacklisted(currentInstance)) {
logger.warn(`⚠️ 跳过黑名单实例: ${currentInstance.ip}:${currentInstance.port}`);
continue;
}
try {
if (isRetry) {
logger.info(`🔄 故障转移到实例: ${currentInstance.ip}:${currentInstance.port} (尝试 ${attempt + 1}/${instancesToTry.length})`);
// 重试前等待一段时间
await new Promise(resolve => setTimeout(resolve, FAILURE_CONFIG.RETRY_DELAY));
}
const success = await proxyRequestToInstance(event, currentInstance, isDebugMode, isRetry);
if (success) {
// 请求成功,重置故障计数
resetInstanceFailure(currentInstance);
if (isRetry) {
logger.info(`✅ 故障转移成功,使用实例: ${currentInstance.ip}:${currentInstance.port}`);
}
return true;
}
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error));
// 记录实例故障
recordInstanceFailure(currentInstance, lastError);
// 检查是否应该继续故障转移
if (!shouldFailover(lastError)) {
logger.warn(`⚠️ 错误类型不适合故障转移: ${lastError.message}`);
break;
}
logger.warn(`⚠️ 实例 ${currentInstance.ip}:${currentInstance.port} 请求失败: ${lastError.message}`);
// 如果这是最后一次尝试,不再继续
if (attempt === instancesToTry.length - 1) {
logger.error(`❌ 所有实例都已尝试,请求最终失败`);
break;
}
}
}
// 所有实例都失败了,返回最后的错误
return handleFinalFailure(event, lastError, isDebugMode);
}
/**
* 处理最终失败的情况
* @param event H3 事件
* @param error 最后的错误
* @param isDebugMode 是否启用调试模式
* @returns 是否成功处理
*/
async function handleFinalFailure(event: H3Event, error: Error | null, isDebugMode: boolean): Promise<boolean> {
// 检查响应头是否已发送
if (event.node.res.headersSent) {
logger.warn(`⚠️ 响应头已发送,无法返回错误响应`);
event.context._laneManagerHandled = true;
return false;
}
try {
// 如果启用了调试模式,添加调试信息
if (isDebugMode) {
const debugInfo: string[] = [];
debugInfo.push(`故障转移时间: ${new Date().toISOString()}`);
debugInfo.push(`最终错误: ${error?.message || '所有实例都不可用'}`);
debugInfo.push(`状态: 所有实例故障转移失败`);
try {
event.node.res.setHeader(HEADER_LANE_DETAIL, getSafeHeaderValue(debugInfo));
} catch (headerError) {
logger.error(`❌ 设置调试响应头时出错: ${headerError instanceof Error ? headerError.message : String(headerError)}`);
}
}
// 设置状态码和内容类型
event.node.res.statusCode = 503; // Service Unavailable
event.node.res.setHeader('Content-Type', 'text/plain');
// 发送错误响应
const errorMessage = `服务暂时不可用,所有实例都无法访问。请稍后重试。`;
event.node.res.end(errorMessage);
logger.error(`❌ 发送服务不可用响应`);
} catch (responseError) {
logger.error(`❌ 发送错误响应时出错: ${responseError instanceof Error ? responseError.message : String(responseError)}`);
}
// 标记请求已处理
event.context._laneManagerHandled = true;
return false;
}
/**
* 转发请求到指定实例(原始实现)
* 将当前请求转发到目标服务实例,并将响应返回给客户端
*
* @param event H3 事件
* @param targetInstance 目标服务实例
* @param isDebugMode 是否启用调试模式
* @param isRetry 是否为重试请求
* @returns 是否成功处理
*/
async function proxyRequestToInstance(
event: H3Event,
targetInstance: ServiceInstanceInfo,
isDebugMode: boolean = false,
isRetry: boolean = false
): Promise<boolean> {
const config = getConfig();
const requestPath = event.node.req.url || '';
const targetUrl = `http://${targetInstance.ip}:${targetInstance.port}${requestPath}`;
logger.info(`🔄 代理请求到目标实例: ${targetInstance.serviceName}@${targetInstance.ip}:${targetInstance.port} (泳道: ${targetInstance.metadata.laneId}), URL: ${targetUrl}`);
// 更新实例连接计数
updateInstanceConnections(targetInstance, true);
try {
// 准备请求头
const newHeaders: Record<string, string | string[] | undefined> = {};
// 复制原始请求头,排除 host 和目标泳道头
for (const key in event.node.req.headers) {
if (key !== 'host' && key !== config.targetLaneHeaderKey) {
newHeaders[key] = event.node.req.headers[key];
}
}
// 添加转发相关的头
const remoteAddress = event.node.req.socket?.remoteAddress;
newHeaders['X-Forwarded-For'] = remoteAddress || event.node.req.headers['x-forwarded-for'] || '';
newHeaders['X-Forwarded-Host'] = event.node.req.headers['host'] || '';
// 确定协议
const proto = event.node.req.headers['x-forwarded-proto'] ||
(event.node.req.socket instanceof TLSSocket && event.node.req.socket.encrypted ? 'https' : 'http');
newHeaders['X-Forwarded-Proto'] = proto;
// 准备请求配置
const proxyRequestConfig: AxiosRequestConfig = {
method: event.node.req.method as AxiosMethod,
url: targetUrl,
headers: newHeaders,
responseType: 'stream',
validateStatus: () => true, // 接受任何状态码
timeout: config.proxyTimeout,
};
// 对于包含请求体的方法,添加请求体
const bodyMethods = ['POST', 'PUT', 'PATCH', 'DELETE'];
if (event.node.req.method && bodyMethods.includes(event.node.req.method.toUpperCase())) {
proxyRequestConfig.data = event.node.req;
}
logger.debug('📋 代理请求配置:', {
...proxyRequestConfig,
data: proxyRequestConfig.data ? '<REQUEST_STREAM>' : undefined,
});
// 发送代理请求
const proxyRes = await axios.request(proxyRequestConfig);
logger.info(`📥 代理响应状态: ${proxyRes.status}`);
// 处理响应
await new Promise<void>((resolve, reject) => {
try {
// 1. 设置状态码
event.node.res.statusCode = proxyRes.status;
// 2. 设置响应头
const headersToSet = new Map<string, string | string[] | number>();
// 复制原始响应头
Object.entries(proxyRes.headers).forEach(([key, value]) => {
if (value !== undefined && value !== null) {
headersToSet.set(key, value as string | string[] | number);
}
});
// 添加自定义头
headersToSet.set(HEADER_PROXIED_BY, HEADER_PROXIED_BY_VALUE);
headersToSet.set(HEADER_ORIGINAL_LANE, config.currentLaneId);
headersToSet.set(DEFAULT_LANE_TARGET_HEADER, targetInstance.metadata.laneId);
// 如果启用了调试模式,添加调试信息
if (isDebugMode) {
const debugInfo: string[] = [];
debugInfo.push(`代理时间: ${new Date().toISOString()}`);
debugInfo.push(`代理目标: ${targetInstance.ip}:${targetInstance.port}`);
debugInfo.push(`目标泳道: ${targetInstance.metadata.laneId}`);
debugInfo.push(`响应状态: ${proxyRes.status}`);
debugInfo.push(`响应大小: ${proxyRes.headers['content-length'] || '未知'}`);
try {
headersToSet.set(HEADER_LANE_DETAIL, getSafeHeaderValue(debugInfo));
} catch (error) {
logger.error(`❌ 设置调试响应头时出错: ${error instanceof Error ? error.message : String(error)}`);
}
}
// 注意:根据需求,我们不设置泳道ID的cookie
// 只保留原始响应中的cookie(如果有)
// 检查响应头是否已发送
if (event.node.res.headersSent) {
logger.warn(`⚠️ 响应头已发送,无法设置新的响应头`);
console.log(`[multi-lane-manager] ⚠️ 响应头已发送,无法设置新的响应头`);
}
// 设置所有头
for (const [key, value] of headersToSet.entries()) {
try {
if (!event.node.res.headersSent) {
event.node.res.setHeader(key, value);
logger.debug(`✅ 成功设置响应头: ${key}=${value}`);
} else {
logger.warn(`⚠️ 无法设置响应头 ${key},响应头已发送`);
console.log(`[multi-lane-manager] ⚠️ 无法设置响应头 ${key},响应头已发送`);
}
} catch (headerError) {
logger.warn(`⚠️ 设置响应头 ${key} 失败: ${headerError instanceof Error ? headerError.message : String(headerError)}`);
console.log(`[multi-lane-manager] ⚠️ 设置响应头 ${key} 失败: ${headerError instanceof Error ? headerError.message : String(headerError)}`);
}
}
// 3. 设置数据流
proxyRes.data.on('error', (err: Error) => {
logger.error(`❌ 代理响应数据流错误: ${err.message}`);
updateInstanceConnections(targetInstance, false);
reject(err);
});
proxyRes.data.on('end', () => {
logger.debug('✅ 代理响应数据流结束');
updateInstanceConnections(targetInstance, false);
resolve();
});
// 标记请求已处理,确保不会再次处理
event.context._laneManagerHandled = true;
// 将代理响应的数据流式传输到原始响应
// 确保在设置所有响应头之后再开始发送响应体
proxyRes.data.pipe(event.node.res);
} catch (err) {
updateInstanceConnections(targetInstance, false);
reject(err);
}
});
// 请求已在数据流处理中被标记为已处理
return true;
} catch (error) {
// 更新实例连接计数
updateInstanceConnections(targetInstance, false);
logger.error(`❌ 代理请求到 ${targetUrl} 失败: ${error instanceof Error ? error.message : String(error)}`);
// 检查响应头是否已发送
if (event.node.res.headersSent) {
logger.warn(`⚠️ 响应头已发送,无法返回错误响应`);
console.log(`[multi-lane-manager] ⚠️ 响应头已发送,无法返回错误响应`);
// 标记请求已处理
event.context._laneManagerHandled = true;
return false;
}
// 如果响应头尚未发送,返回错误响应
try {
// 如果启用了调试模式,添加调试信息
if (isDebugMode) {
const debugInfo: string[] = [];
debugInfo.push(`代理时间: ${new Date().toISOString()}`);
debugInfo.push(`代理目标: ${targetInstance.ip}:${targetInstance.port}`);
debugInfo.push(`目标泳道: ${targetInstance.metadata.laneId}`);
debugInfo.push(`错误: ${error instanceof Error ? error.message : '未知代理错误'}`);
try {
event.node.res.setHeader(HEADER_LANE_DETAIL, getSafeHeaderValue(debugInfo));
logger.debug(`✅ 成功设置错误响应头: ${HEADER_LANE_DETAIL}`);
} catch (headerError) {
logger.error(`❌ 设置调试响应头时出错: ${headerError instanceof Error ? headerError.message : String(headerError)}`);
console.log(`[multi-lane-manager] ❌ 设置调试响应头时出错: ${headerError instanceof Error ? headerError.message : String(headerError)}`);
}
}
// 设置状态码和内容类型
event.node.res.statusCode = 502; // Bad Gateway
event.node.res.setHeader('Content-Type', 'text/plain');
// 发送错误响应
const errorMessage = `代理请求到泳道 ${targetInstance.metadata.laneId} 时出错。原因: ${error instanceof Error ? error.message : '未知代理错误'}`;
event.node.res.end(errorMessage);
logger.debug(`✅ 成功发送错误响应`);
} catch (responseError) {
logger.error(`❌ 发送错误响应时出错: ${responseError instanceof Error ? responseError.message : String(responseError)}`);
console.log(`[multi-lane-manager] ❌ 发送错误响应时出错: ${responseError instanceof Error ? responseError.message : String(responseError)}`);
}
// 标记请求已处理
event.context._laneManagerHandled = true;
return false;
}
}
/**
* 兼容性导出:原始的代理请求函数
* 为了保持向后兼容性,保留原始的proxyRequest函数
*
* @param event H3 事件
* @param targetInstance 目标服务实例
* @param isDebugMode 是否启用调试模式
* @returns 是否成功处理
*/
export async function proxyRequest(
event: H3Event,
targetInstance: ServiceInstanceInfo,
isDebugMode: boolean = false
): Promise<boolean> {
// 调用带故障转移的版本,但不提供其他实例(保持原始行为)
return proxyRequestToInstance(event, targetInstance, isDebugMode, false);
}