lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
83 lines (72 loc) • 2.77 kB
JavaScript
/**
* Cluster Mode — Multi-Core Scaling
*
* Forks one worker per CPU core. Each worker runs a full Lynkr
* instance with its own Express server, event loop, and connection pool.
*
* Enable: CLUSTER_ENABLED=true (default: false for dev, recommended for prod)
* Workers: CLUSTER_WORKERS=auto (default) or a number
*
* Architecture:
* Primary process → forks N workers → each worker calls start()
* Primary handles: signal forwarding, worker respawning, health monitoring
* Workers handle: HTTP requests, LLM proxying, tool execution
*
* Shared state considerations:
* - SQLite: WAL mode supports concurrent readers across processes
* - In-memory caches (prompt, circuit breaker): per-worker (not shared)
* - Rate limiting: per-worker (sessions are sticky via round-robin)
*
* @module cluster
*/
const cluster = require('node:cluster');
const os = require('node:os');
const WORKER_COUNT = (() => {
const env = process.env.CLUSTER_WORKERS;
if (!env || env === 'auto') return Math.max(os.cpus().length - 1, 1);
const n = parseInt(env, 10);
return Number.isNaN(n) || n < 1 ? Math.max(os.cpus().length - 1, 1) : n;
})();
function startCluster() {
if (cluster.isPrimary) {
console.log(`[cluster] Primary ${process.pid} starting ${WORKER_COUNT} workers`);
// Fork workers
for (let i = 0; i < WORKER_COUNT; i++) {
cluster.fork();
}
// Respawn crashed workers
cluster.on('exit', (worker, code, signal) => {
if (signal) {
console.log(`[cluster] Worker ${worker.process.pid} killed by signal ${signal}`);
} else if (code !== 0) {
console.log(`[cluster] Worker ${worker.process.pid} exited with code ${code}, respawning...`);
cluster.fork();
} else {
console.log(`[cluster] Worker ${worker.process.pid} exited cleanly`);
}
});
// Forward SIGTERM/SIGINT to all workers for graceful shutdown
const shutdown = (sig) => {
console.log(`[cluster] Primary received ${sig}, shutting down workers...`);
for (const id in cluster.workers) {
cluster.workers[id].process.kill(sig);
}
// Give workers 10s to drain, then force exit
setTimeout(() => {
console.log('[cluster] Force exit after 10s drain timeout');
process.exit(0);
}, 10000).unref();
};
process.on('SIGTERM', () => shutdown('SIGTERM'));
process.on('SIGINT', () => shutdown('SIGINT'));
// Log worker status
cluster.on('online', (worker) => {
console.log(`[cluster] Worker ${worker.process.pid} online`);
});
} else {
// Worker process — start the normal Lynkr server
const { start } = require('./server');
start();
}
}
module.exports = { startCluster, WORKER_COUNT };