lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
247 lines (224 loc) • 7.11 kB
JavaScript
/**
* LinUCB contextual bandit for intra-tier model selection (Phase 4.1).
*
* Standard LinUCB-with-disjoint-models algorithm (Li et al. 2010).
* - One arm per (provider, model) pair in a tier
* - Context = numerical feature vector for the request
* - Reward = quality_score - λ·norm_cost - μ·norm_latency
* - Per-arm A (d×d ridge-regression matrix) and b (d-vector) stored to disk
*
* State persists to data/bandit-state.json. Loaded on startup; saved on
* every `update()` (cheap — small matrices) and on graceful shutdown.
*/
const fs = require('fs');
const path = require('path');
const logger = require('../logger');
const STATE_PATH = path.join(__dirname, '../../data/bandit-state.json');
const DEFAULT_ALPHA = 1.5;
const DEFAULT_LAMBDA = 0.3; // cost penalty weight
const DEFAULT_MU = 0.1; // latency penalty weight
const FEATURE_DIM = 12;
const EXPLORATION_RATE = 0.05;
function _identity(d) {
const m = new Array(d);
for (let i = 0; i < d; i++) {
m[i] = new Array(d).fill(0);
m[i][i] = 1;
}
return m;
}
function _zeros(d) {
return new Array(d).fill(0);
}
function _matVec(M, v) {
const d = v.length;
const out = new Array(d).fill(0);
for (let i = 0; i < d; i++) {
for (let j = 0; j < d; j++) out[i] += M[i][j] * v[j];
}
return out;
}
function _dot(a, b) {
let s = 0;
for (let i = 0; i < a.length; i++) s += a[i] * b[i];
return s;
}
function _outer(a, b) {
const out = new Array(a.length);
for (let i = 0; i < a.length; i++) {
out[i] = new Array(b.length);
for (let j = 0; j < b.length; j++) out[i][j] = a[i] * b[j];
}
return out;
}
function _addMat(A, B) {
for (let i = 0; i < A.length; i++) {
for (let j = 0; j < A[i].length; j++) A[i][j] += B[i][j];
}
}
function _addVec(a, b) {
for (let i = 0; i < a.length; i++) a[i] += b[i];
}
/**
* Invert a small dense matrix via Gauss-Jordan. For d=12 this is plenty fast
* and saves us a dependency on a linear algebra library.
*/
function _inv(M) {
const d = M.length;
const aug = M.map((row, i) => {
const r = row.slice();
for (let j = 0; j < d; j++) r.push(i === j ? 1 : 0);
return r;
});
for (let i = 0; i < d; i++) {
let pivot = aug[i][i];
if (Math.abs(pivot) < 1e-12) {
let swap = -1;
for (let k = i + 1; k < d; k++) {
if (Math.abs(aug[k][i]) > 1e-12) { swap = k; break; }
}
if (swap < 0) throw new Error('matrix singular');
[aug[i], aug[swap]] = [aug[swap], aug[i]];
pivot = aug[i][i];
}
for (let j = 0; j < 2 * d; j++) aug[i][j] /= pivot;
for (let k = 0; k < d; k++) {
if (k === i) continue;
const factor = aug[k][i];
for (let j = 0; j < 2 * d; j++) aug[k][j] -= factor * aug[i][j];
}
}
return aug.map(row => row.slice(d));
}
class LinUCBBandit {
constructor({ alpha = DEFAULT_ALPHA, lambda = DEFAULT_LAMBDA, mu = DEFAULT_MU, dim = FEATURE_DIM } = {}) {
this.alpha = alpha;
this.lambda = lambda;
this.mu = mu;
this.dim = dim;
/** arms: Map<armKey, { A: number[][], b: number[], count: number }> */
this.arms = new Map();
this.steps = 0;
this._load();
}
_armKey(tier, provider, model) {
return `${tier}|${provider}:${model}`;
}
_ensureArm(armKey) {
if (!this.arms.has(armKey)) {
this.arms.set(armKey, { A: _identity(this.dim), b: _zeros(this.dim), count: 0 });
}
return this.arms.get(armKey);
}
/**
* Pick an arm for a given tier and context.
* @param {string} tier
* @param {Array<{ provider: string, model: string }>} candidates — qualifying arms
* @param {number[]} context — feature vector
* @returns {{ provider, model, ucb, explored }} chosen arm
*/
pick(tier, candidates, context) {
if (!candidates || candidates.length === 0) return null;
if (context.length !== this.dim) {
// Pad or truncate to dim
context = context.slice(0, this.dim);
while (context.length < this.dim) context.push(0);
}
// ε-greedy: 5% pure exploration
if (Math.random() < EXPLORATION_RATE) {
const random = candidates[Math.floor(Math.random() * candidates.length)];
return { ...random, ucb: null, explored: true };
}
let best = null;
let bestUcb = -Infinity;
for (const c of candidates) {
const key = this._armKey(tier, c.provider, c.model);
const arm = this._ensureArm(key);
let Ainv;
try {
Ainv = _inv(arm.A);
} catch (err) {
continue;
}
const theta = _matVec(Ainv, arm.b);
const mean = _dot(theta, context);
const variance = _dot(context, _matVec(Ainv, context));
const ucb = mean + this.alpha * Math.sqrt(Math.max(0, variance));
if (ucb > bestUcb) {
bestUcb = ucb;
best = { ...c, ucb, explored: false };
}
}
return best;
}
/**
* Update the chosen arm with the observed reward.
* @param {string} tier
* @param {string} provider
* @param {string} model
* @param {number[]} context
* @param {number} reward — typically in [0, 100]; will be rescaled to [0, 1] internally
*/
update(tier, provider, model, context, reward) {
const key = this._armKey(tier, provider, model);
const arm = this._ensureArm(key);
let ctx = context;
if (ctx.length !== this.dim) {
ctx = ctx.slice(0, this.dim);
while (ctx.length < this.dim) ctx.push(0);
}
const r = Math.max(0, Math.min(1, reward / 100));
_addMat(arm.A, _outer(ctx, ctx));
_addVec(arm.b, ctx.map(x => x * r));
arm.count++;
this.steps++;
// Save periodically (not every step to limit IO)
if (this.steps % 25 === 0) this._save();
}
_save() {
try {
fs.mkdirSync(path.dirname(STATE_PATH), { recursive: true });
const arms = {};
for (const [k, v] of this.arms) arms[k] = v;
fs.writeFileSync(STATE_PATH, JSON.stringify({
savedAt: Date.now(),
steps: this.steps,
alpha: this.alpha,
lambda: this.lambda,
mu: this.mu,
dim: this.dim,
arms,
}, null, 0));
} catch (err) {
logger.debug({ err: err.message }, '[Bandit] State save failed');
}
}
_load() {
try {
if (!fs.existsSync(STATE_PATH)) return;
const raw = JSON.parse(fs.readFileSync(STATE_PATH, 'utf8'));
if (raw.dim && raw.dim === this.dim) {
for (const [k, v] of Object.entries(raw.arms || {})) {
this.arms.set(k, v);
}
this.steps = raw.steps || 0;
logger.info({ arms: this.arms.size, steps: this.steps }, '[Bandit] State loaded');
}
} catch (err) {
logger.debug({ err: err.message }, '[Bandit] State load failed');
}
}
getStats() {
const armStats = {};
for (const [k, v] of this.arms) {
armStats[k] = { count: v.count };
}
return { steps: this.steps, arms: armStats, alpha: this.alpha };
}
}
let _instance = null;
function getBandit() {
if (!_instance) _instance = new LinUCBBandit();
return _instance;
}
module.exports = { LinUCBBandit, getBandit, FEATURE_DIM };