lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
179 lines (155 loc) • 5.44 kB
JavaScript
const logger = require("../logger");
let cachedEncode;
let cachedLoadError;
let warnedMissingDependency = false;
function normaliseSettings(settings = {}) {
const minBytesRaw =
typeof settings.minBytes === "number" ? settings.minBytes : Number.parseInt(settings.minBytes ?? "4096", 10);
return {
enabled: settings.enabled === true,
minBytes: Number.isFinite(minBytesRaw) && minBytesRaw > 0 ? minBytesRaw : 4096,
failOpen: settings.failOpen !== false,
logStats: settings.logStats !== false,
};
}
function resolveEncodeFn(overrideEncode) {
if (typeof overrideEncode === "function") return overrideEncode;
if (cachedEncode !== undefined) return cachedEncode;
// cachedEncode is populated asynchronously at module load via dynamic import below.
// Return null here — the warn-once log will fire on first request if still loading.
return null;
}
// @toon-format/toon is ESM-only; dynamic import() is available in CommonJS modules.
// Pre-warm at startup so encode is ready before the first request arrives.
import("@toon-format/toon").then((mod) => {
const fn = mod?.encode ?? mod?.default?.encode ?? null;
cachedEncode = typeof fn === "function" ? fn : null;
cachedLoadError = cachedEncode ? null : new Error("Missing encode() export from @toon-format/toon");
}).catch((err) => {
cachedEncode = null;
cachedLoadError = err;
});
function looksLikeJsonObjectOrArray(text) {
if (typeof text !== "string") return false;
const trimmed = text.trim();
if (trimmed.length < 2) return false;
return (
(trimmed.startsWith("{") && trimmed.endsWith("}")) ||
(trimmed.startsWith("[") && trimmed.endsWith("]"))
);
}
function safeJsonParse(text) {
try {
return JSON.parse(text);
} catch {
return null;
}
}
function toToonString(encodeFn, value) {
const encoded = encodeFn(value);
if (typeof encoded === "string") return encoded;
if (encoded && typeof encoded[Symbol.iterator] === "function") {
return Array.from(encoded).join("\n");
}
return "";
}
function compressStringContent(content, cfg, encodeFn, stats) {
if (typeof content !== "string") return content;
const originalBytes = Buffer.byteLength(content, "utf8");
if (originalBytes < cfg.minBytes) {
stats.skippedBySize += 1;
return content;
}
stats.candidateCount += 1;
if (!looksLikeJsonObjectOrArray(content)) {
stats.skippedByShape += 1;
return content;
}
const parsed = safeJsonParse(content);
if (!parsed || typeof parsed !== "object") {
stats.skippedByParse += 1;
return content;
}
const toonText = toToonString(encodeFn, parsed);
if (typeof toonText !== "string" || toonText.trim().length === 0) {
return content;
}
const compressedBytes = Buffer.byteLength(toonText, "utf8");
stats.convertedCount += 1;
stats.originalBytes += originalBytes;
stats.compressedBytes += compressedBytes;
return toonText;
}
function applyToonCompression(payload, settings = {}, options = {}) {
const cfg = normaliseSettings(settings);
const stats = {
enabled: cfg.enabled,
available: true,
convertedCount: 0,
candidateCount: 0,
skippedBySize: 0,
skippedByShape: 0,
skippedByParse: 0,
failureCount: 0,
originalBytes: 0,
compressedBytes: 0,
};
if (!cfg.enabled) return { payload, stats };
if (!payload || !Array.isArray(payload.messages) || payload.messages.length === 0) {
return { payload, stats };
}
const encodeFn = resolveEncodeFn(options.encode);
if (typeof encodeFn !== "function") {
stats.available = false;
const err = cachedLoadError ?? new Error("TOON encoder unavailable");
if (!cfg.failOpen) throw err;
if (!warnedMissingDependency) {
logger.warn(
{ error: err.message },
"TOON enabled but encoder dependency is unavailable; falling back to JSON",
);
warnedMissingDependency = true;
}
return { payload, stats };
}
for (const message of payload.messages) {
if (!message || typeof message !== "object") continue;
if (message.role === "tool") continue; // Never mutate machine-executed protocol payloads
try {
if (typeof message.content === "string") {
message.content = compressStringContent(message.content, cfg, encodeFn, stats);
continue;
}
if (!Array.isArray(message.content)) continue;
for (const block of message.content) {
if (!block || typeof block !== "object") continue;
// Keep protocol blocks untouched. Only compress user-language text fields.
if (block.type === "text" && typeof block.text === "string") {
block.text = compressStringContent(block.text, cfg, encodeFn, stats);
continue;
}
if (block.type === "input_text" && typeof block.input_text === "string") {
block.input_text = compressStringContent(block.input_text, cfg, encodeFn, stats);
}
}
} catch (err) {
stats.failureCount += 1;
if (!cfg.failOpen) throw err;
}
}
if (cfg.logStats && stats.convertedCount > 0) {
logger.info(
{
convertedCount: stats.convertedCount,
candidateCount: stats.candidateCount,
originalBytes: stats.originalBytes,
compressedBytes: stats.compressedBytes,
},
"TOON compression applied to message context",
);
}
return { payload, stats };
}
module.exports = {
applyToonCompression,
};