lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
315 lines (268 loc) • 11.2 kB
JavaScript
const express = require("express");
const compression = require("compression");
const config = require("./config");
const loggingMiddleware = require("./api/middleware/logging");
const router = require("./api/router");
const { sessionMiddleware } = require("./api/middleware/session");
const { budgetMiddleware } = require("./api/middleware/budget");
const { metricsMiddleware } = require("./api/middleware/metrics");
const { requestLoggingMiddleware } = require("./api/middleware/request-logging");
const { errorHandlingMiddleware, notFoundHandler } = require("./api/middleware/error-handling");
const { loadSheddingMiddleware, initializeLoadShedder } = require("./api/middleware/load-shedding");
const { tenantMiddleware } = require("./api/middleware/tenant");
const { budgetEnforcer } = require("./api/middleware/budget-enforcer");
const { livenessCheck, readinessCheck } = require("./api/health");
const { getMetricsCollector } = require("./observability/metrics");
const { getShutdownManager } = require("./server/shutdown");
const { getCircuitBreakerRegistry } = require("./clients/circuit-breaker");
const metrics = require("./metrics");
const logger = require("./logger");
const { initialiseMcp } = require("./mcp");
const { registerStubTools } = require("./tools/stubs");
const { registerWorkspaceTools } = require("./tools/workspace");
const { registerExecutionTools } = require("./tools/execution");
const { registerWebTools } = require("./tools/web");
const { registerIndexerTools } = require("./tools/indexer");
const { registerEditTools } = require("./tools/edits");
const { registerGitTools } = require("./tools/git");
const { registerTaskTools } = require("./tools/tasks");
const { registerTestTools } = require("./tools/tests");
const { registerMcpTools } = require("./tools/mcp");
const { registerAgentTaskTool } = require("./tools/agent-task");
const { initConfigWatcher, getConfigWatcher } = require("./config/watcher");
const { initializeHeadroom, shutdownHeadroom, getHeadroomManager } = require("./headroom");
const { getWorkerPool, isWorkerPoolReady } = require("./workers/pool");
const lazyLoader = require("./tools/lazy-loader");
const { setLazyLoader } = require("./tools");
const { waitForOllama } = require("./clients/ollama-startup");
// Initialize MCP
initialiseMcp();
// Set up lazy tool loading
setLazyLoader(lazyLoader);
// Check if lazy loading is enabled (default: true)
const LAZY_TOOLS_ENABLED = process.env.LAZY_TOOLS_ENABLED !== "false";
if (LAZY_TOOLS_ENABLED) {
// Only load core tools at startup (stubs, workspace, execution)
lazyLoader.loadCoreTools();
logger.info({ mode: "lazy" }, "Lazy tool loading enabled - other tools will load on demand");
} else {
// Backwards compatibility: load all tools at startup
registerStubTools();
registerWorkspaceTools();
registerExecutionTools();
registerWebTools();
registerIndexerTools();
registerEditTools();
registerGitTools();
registerTaskTools();
registerTestTools();
registerMcpTools();
registerAgentTaskTool();
logger.info({ mode: "eager" }, "All tools loaded at startup");
}
function createApp() {
const app = express();
const path = require('path');
const fs = require('fs');
// Dashboard — registered first so it is never shadowed by the main router
const DASHBOARD_HTML = path.resolve(__dirname, '../public/dashboard.html');
app.get('/dashboard', (_req, res) => {
try {
const html = fs.readFileSync(DASHBOARD_HTML, 'utf8');
res.setHeader('Content-Type', 'text/html; charset=utf-8');
res.send(html);
} catch (e) {
res.status(500).json({ error: 'dashboard_read_failed', path: DASHBOARD_HTML, detail: e.message });
}
});
app.get('/dashboard/api/overview', require('./dashboard/api').overview);
app.get('/dashboard/api/usage', require('./dashboard/api').usage);
app.get('/dashboard/api/routing', require('./dashboard/api').routing);
app.get('/dashboard/api/logs', require('./dashboard/api').logs);
// Initialize load shedder (log configuration)
initializeLoadShedder();
// Load shedding (protect against overload)
app.use(loadSheddingMiddleware);
// Request logging (add request IDs, structured logs)
app.use(requestLoggingMiddleware);
// Metrics collection
app.use(metricsMiddleware);
// Note: If using a tunnel (ngrok, Cloudflare Tunnel) and seeing BrotliDecompressionError,
// start ngrok with: ngrok http 8081 --request-header-remove "Accept-Encoding"
app.use(express.json({ limit: config.server.jsonLimit }));
app.use(sessionMiddleware);
app.use(loggingMiddleware);
// Budget and rate limiting (can be disabled via config)
if (config.budget?.enabled !== false) {
app.use('/v1/messages', budgetMiddleware);
}
// Phase 6.1 — per-tenant routing policies (LYNKR-Tenant-Id header).
// Runs before message handling so res.locals.tenantPolicy is populated.
app.use('/v1/messages', tenantMiddleware);
// Phase 6.2 — hierarchical budget enforcement (LYNKR_BUDGET_ENFORCER=false to disable).
app.use('/v1/messages', budgetEnforcer);
// Health check endpoints
app.get("/health/live", livenessCheck);
app.get("/health/ready", readinessCheck);
// Metrics endpoints
app.get("/metrics", (req, res) => {
res.json(metrics.snapshot());
});
app.get("/metrics/observability", (req, res) => {
const metricsCollector = getMetricsCollector();
res.json(metricsCollector.getMetrics());
});
app.get("/metrics/prometheus", (req, res) => {
const metricsCollector = getMetricsCollector();
res.set("Content-Type", "text/plain");
res.send(metricsCollector.toPrometheus());
});
app.get("/metrics/circuit-breakers", (req, res) => {
const registry = getCircuitBreakerRegistry();
res.json(registry.getAll());
});
app.get("/metrics/load-shedding", (req, res) => {
const { getLoadShedder } = require("./api/middleware/load-shedding");
const shedder = getLoadShedder();
res.json(shedder.getMetrics());
});
app.get("/metrics/worker-pool", (req, res) => {
if (!isWorkerPoolReady()) {
return res.json({ enabled: false, message: "Worker pool not initialized" });
}
const pool = getWorkerPool();
res.json({ enabled: true, ...pool.getStats() });
});
app.get("/metrics/semantic-cache", (req, res) => {
const { getSemanticCache, isSemanticCacheEnabled } = require("./cache/semantic");
if (!isSemanticCacheEnabled()) {
return res.json({ enabled: false, message: "Semantic cache not enabled" });
}
const cache = getSemanticCache();
res.json({ enabled: true, ...cache.getStats() });
});
app.get("/metrics/lazy-tools", (req, res) => {
res.json({
enabled: LAZY_TOOLS_ENABLED,
...lazyLoader.getLoaderStats(),
});
});
app.use(router);
// Dashboard UI
app.use('/dashboard', require('./dashboard/router'));
// Files API
const filesRouter = require("./api/files-router");
app.use("/v1", filesRouter);
// 404 handler (must be after all routes)
app.use(notFoundHandler);
// Error handler (must be last)
app.use(errorHandlingMiddleware);
return app;
}
async function start() {
// Initialize Worker Thread Pool (if enabled)
// This pre-warms worker threads for CPU-intensive tasks
if (config.workerPool?.enabled !== false) {
try {
const poolOptions = {
size: config.workerPool?.size || undefined, // undefined = auto
taskTimeout: config.workerPool?.taskTimeoutMs || 5000,
offloadThreshold: config.workerPool?.offloadThresholdBytes || 10000,
};
const pool = getWorkerPool(poolOptions);
await pool.initialize();
logger.info({ poolSize: pool.size }, "Worker thread pool initialized");
} catch (err) {
logger.error({ err }, "Worker pool initialization failed, continuing without worker threads");
}
}
// Initialize Headroom sidecar (if enabled)
// This must happen before the server starts accepting requests
if (config.headroom?.enabled) {
try {
const result = await initializeHeadroom();
if (result.success) {
logger.info("Headroom sidecar initialized");
} else {
logger.warn({ error: result.error }, "Headroom initialization failed, continuing without compression");
}
} catch (err) {
logger.error({ err }, "Headroom initialization error, continuing without compression");
}
}
const app = createApp();
// Wait for Ollama if it's the configured provider or referenced in tier config
const provider = config.modelProvider?.type?.toLowerCase();
if (provider === "ollama" || config.tiersReferenceOllama()) {
await waitForOllama();
// Pre-probe Ollama's Anthropic API at startup (avoids 1-3s cold-start on first request)
try {
const { hasAnthropicEndpoint } = require("./clients/ollama-utils");
await hasAnthropicEndpoint(config.ollama.endpoint);
} catch (err) {
logger.debug({ err: err.message }, "Ollama Anthropic endpoint probe failed at startup");
}
}
const server = app.listen(config.port, () => {
console.log(`Claude→Databricks proxy listening on http://localhost:${config.port}`);
});
// Start session cleanup manager
const { getSessionCleanupManager } = require("./sessions/cleanup");
const sessionCleanup = getSessionCleanupManager();
sessionCleanup.start();
// Setup graceful shutdown
const shutdownManager = getShutdownManager();
shutdownManager.registerServer(server);
shutdownManager.setupSignalHandlers();
// Register Headroom shutdown callback
if (config.headroom?.enabled) {
shutdownManager.onShutdown(async () => {
logger.info("Stopping Headroom sidecar on shutdown");
await shutdownHeadroom(false); // Don't remove container on shutdown
});
}
// Register Worker Pool shutdown callback
if (config.workerPool?.enabled !== false && isWorkerPoolReady()) {
shutdownManager.onShutdown(async () => {
logger.info("Stopping worker thread pool on shutdown");
const pool = getWorkerPool();
await pool.shutdown();
});
}
// Register Codex process shutdown callback
shutdownManager.onShutdown(async () => {
try {
const { getCodexProcess } = require("./clients/codex-process");
const codex = getCodexProcess();
if (codex.child) {
await codex.shutdown();
}
} catch { /* ignore if codex never started */ }
});
// Initialize hot reload config watcher
if (config.hotReload?.enabled !== false) {
const watcher = initConfigWatcher({
paths: [".env"],
debounceMs: config.hotReload?.debounceMs || 1000,
enabled: true,
});
watcher.on("change", (filepath) => {
try {
config.reloadConfig();
logger.info({ filepath }, "Configuration hot-reloaded successfully");
} catch (err) {
logger.error({ error: err.message, filepath }, "Failed to hot-reload configuration");
}
});
// Stop watcher on shutdown
shutdownManager.onShutdown(() => {
const w = getConfigWatcher();
if (w) w.stop();
});
}
return server;
}
module.exports = {
createApp,
start,
};