lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
610 lines (540 loc) • 19.5 kB
JavaScript
const pino = require("pino");
const path = require("path");
const fs = require("fs");
const { ContentDeduplicator } = require("./deduplicator");
/**
* LLM Audit Logger
*
* Dedicated logger for capturing LLM request/response audit trails.
* Logs to a separate file for easy parsing, searching, and compliance.
*
* Log Entry Types:
* - llm_request: User messages sent to LLM providers
* - llm_response: LLM responses received from providers
*
* Key Features:
* - Separate log file (llm-audit.log) for easy parsing
* - Correlation IDs to link requests with responses
* - Network destination tracking (IP, hostname, URL)
* - Content truncation to control log size
* - Async writes for minimal latency impact
* - Daily log rotation with configurable retention
*/
/**
* Create audit logger instance
* @param {Object} config - Audit configuration
* @returns {Object} Pino logger instance
*/
function createAuditLogger(config) {
// Ensure log directory exists
const logDir = path.dirname(config.logFile);
if (!fs.existsSync(logDir)) {
fs.mkdirSync(logDir, { recursive: true });
}
// Create dedicated pino instance for audit logs
const auditLogger = pino(
{
level: "info", // Always log at info level for compliance
name: "llm-audit",
base: null, // Don't include pid/hostname to keep logs clean
timestamp: pino.stdTimeFunctions.isoTime,
formatters: {
level: (label) => {
return { level: label };
},
},
},
pino.destination({
dest: config.logFile,
sync: false, // Async writes for performance
mkdir: true,
})
);
return auditLogger;
}
/**
* Truncate content if it exceeds max length
* @param {string|Array|Object} content - Content to truncate
* @param {number} maxLength - Maximum length (0 = no truncation)
* @returns {Object} { content, truncated, originalLength }
*/
function truncateContent(content, maxLength) {
if (maxLength === 0) {
return { content, truncated: false, originalLength: null };
}
// Handle different content types
let stringContent;
if (typeof content === "string") {
stringContent = content;
} else if (Array.isArray(content)) {
stringContent = JSON.stringify(content);
} else if (typeof content === "object" && content !== null) {
stringContent = JSON.stringify(content);
} else {
return { content, truncated: false, originalLength: null };
}
const originalLength = stringContent.length;
if (originalLength <= maxLength) {
return { content, truncated: false, originalLength };
}
// Truncate and add indicator
const truncated = stringContent.substring(0, maxLength);
const indicator = `... [truncated, ${originalLength - maxLength} chars omitted]`;
// Try to parse back to original type if it was JSON
if (typeof content !== "string") {
try {
return {
content: truncated + indicator,
truncated: true,
originalLength,
};
} catch {
return {
content: truncated + indicator,
truncated: true,
originalLength,
};
}
}
return {
content: truncated + indicator,
truncated: true,
originalLength,
};
}
/**
* Hash and truncate content for audit logging
* Hashes the ORIGINAL content before truncation to preserve full content hash
* @param {string|Array|Object} content - Content to hash and truncate
* @param {number} maxLength - Maximum length for truncation (0 = no truncation)
* @param {ContentDeduplicator} deduplicator - Deduplicator instance for hashing
* @returns {Object} { hash, content, truncated, originalLength }
*/
function hashAndTruncate(content, maxLength, deduplicator) {
if (!content) {
return { hash: null, content: null, truncated: false, originalLength: null };
}
// Hash the ORIGINAL content before any truncation
const hash = deduplicator ? deduplicator.hashContent(content) : null;
// Then truncate for display
const truncationResult = truncateContent(content, maxLength);
return {
hash,
content: truncationResult.content,
truncated: truncationResult.truncated,
originalLength: truncationResult.originalLength,
};
}
/**
* Smart truncation for system reminder content
* Keeps first N characters and everything from the LAST </system-reminder> tag onwards
* @param {string|Array|Object} content - Content to truncate
* @param {number} prefixLength - Length of prefix to keep (default: 50)
* @returns {Object} { content, truncated, originalLength, charsRemoved }
*/
function truncateSystemReminder(content, prefixLength = 50) {
// Handle different content types
let stringContent;
if (typeof content === "string") {
stringContent = content;
} else if (Array.isArray(content)) {
stringContent = JSON.stringify(content);
} else if (typeof content === "object" && content !== null) {
stringContent = JSON.stringify(content);
} else {
return { content, truncated: false, originalLength: null, charsRemoved: 0 };
}
const originalLength = stringContent.length;
// Find the LAST occurrence of </system-reminder> tag
const tagIndex = stringContent.lastIndexOf("</system-reminder>");
// If tag not found, return unchanged
if (tagIndex === -1) {
return { content, truncated: false, originalLength, charsRemoved: 0 };
}
// If tag is within the prefix, don't truncate
if (tagIndex < prefixLength) {
return { content, truncated: false, originalLength, charsRemoved: 0 };
}
// Extract prefix and suffix
const prefix = stringContent.substring(0, prefixLength);
const suffix = stringContent.substring(tagIndex);
// Calculate what would be removed
const charsRemoved = tagIndex - prefixLength;
// If removal would be insignificant (< 100 chars), don't truncate
if (charsRemoved < 100) {
return { content, truncated: false, originalLength, charsRemoved: 0 };
}
// Build truncated content
const truncatedContent = prefix + "..." + suffix;
return {
content: truncatedContent,
truncated: true,
originalLength,
charsRemoved,
};
}
/**
* Hash and apply smart truncation for system reminder content
* Hashes the ORIGINAL content before truncation
* @param {string|Array|Object} content - Content to hash and truncate
* @param {number} prefixLength - Length of prefix to keep (default: 50)
* @param {ContentDeduplicator} deduplicator - Deduplicator instance for hashing
* @returns {Object} { hash, content, truncated, originalLength, charsRemoved }
*/
function hashAndTruncateSystemReminder(content, prefixLength = 50, deduplicator) {
if (!content) {
return { hash: null, content: null, truncated: false, originalLength: null, charsRemoved: 0 };
}
// Hash the ORIGINAL content before any truncation
const hash = deduplicator ? deduplicator.hashContent(content) : null;
// Then apply smart truncation
const truncationResult = truncateSystemReminder(content, prefixLength);
return {
hash,
content: truncationResult.content,
truncated: truncationResult.truncated,
originalLength: truncationResult.originalLength,
charsRemoved: truncationResult.charsRemoved,
};
}
/**
* Extract hostname and port from URL
* @param {string} url - Full URL
* @returns {Object} { hostname, port }
*/
function parseDestinationUrl(url) {
try {
const parsed = new URL(url);
return {
hostname: parsed.hostname,
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
protocol: parsed.protocol.replace(":", ""),
};
} catch {
return { hostname: null, port: null, protocol: null };
}
}
/**
* Create audit logger wrapper with convenience methods
* @param {Object} config - Audit configuration from config.js
* @returns {Object} Audit logger interface
*/
function createAuditLoggerWrapper(config) {
if (!config.enabled) {
// Return no-op logger if disabled
return {
logLlmRequest: () => {},
logLlmResponse: () => {},
restoreLogEntry: (entry) => entry,
enabled: false,
};
}
const logger = createAuditLogger(config);
// Support both legacy single value and new object format for maxContentLength
const maxContentLength =
typeof config.maxContentLength === 'object'
? config.maxContentLength
: {
systemPrompt: config.maxContentLength || 5000,
userMessages: config.maxContentLength || 5000,
response: config.maxContentLength || 5000,
};
// Initialize deduplicator if enabled
const deduplicator =
config.deduplication?.enabled
? new ContentDeduplicator(config.deduplication.dictionaryPath, {
minSize: config.deduplication.minSize,
cacheSize: config.deduplication.cacheSize,
sanitize: config.deduplication.sanitize,
sessionCache: config.deduplication.sessionCache,
})
: null;
/**
* Log hash annotation line for easy lookup
* @private
* @param {Object} hashes - Hash values to annotate
*/
function logHashAnnotation(hashes) {
if (!config.annotations) {
return; // Skip if annotations disabled
}
const annotationEntry = {
_annotation: true,
lookup: "Use: node scripts/audit-log-reader.js --hash <hash>",
};
// Add any provided hashes
if (hashes.systemPromptHash) {
annotationEntry.systemPromptHash = hashes.systemPromptHash;
}
if (hashes.userMessagesHash) {
annotationEntry.userMessagesHash = hashes.userMessagesHash;
}
if (hashes.userQueryHash) {
annotationEntry.userQueryHash = hashes.userQueryHash;
}
logger.info(annotationEntry);
}
return {
/**
* Log LLM request (user message sent to provider)
* @param {Object} context - Request context
*/
logLlmRequest(context) {
const {
correlationId,
sessionId,
provider,
model,
stream,
destinationUrl,
userMessages,
systemPrompt,
tools,
maxTokens,
} = context;
const { hostname, port, protocol } = parseDestinationUrl(destinationUrl);
// Hash BEFORE truncate - this ensures we track the original content
// Use specific max lengths for different content types
const hashedMessages = hashAndTruncate(userMessages, maxContentLength.userMessages, deduplicator);
const hashedSystem = systemPrompt
? hashAndTruncate(systemPrompt, maxContentLength.systemPrompt, deduplicator)
: { hash: null, content: null, truncated: false };
// Deduplicate large content if enabled (using original content hash)
// Session-level deduplication: first time outputs truncated content, subsequent times output reference
let finalUserMessages = hashedMessages.content;
let finalSystemPrompt = hashedSystem.content;
if (deduplicator) {
// Deduplicate userMessages if original content is large enough
if (userMessages && deduplicator.shouldDeduplicate(userMessages)) {
const isFirstTime = deduplicator.isFirstTimeInSession(hashedMessages.hash);
if (isFirstTime) {
// First time: output truncated content, but store in dictionary
deduplicator.storeContentWithHash(userMessages, hashedMessages.hash);
finalUserMessages = hashedMessages.content; // Use truncated content
} else {
// Subsequent times: output only reference
finalUserMessages = deduplicator.storeContentWithHash(
userMessages,
hashedMessages.hash
);
}
}
// Deduplicate systemPrompt if original content is large enough
if (systemPrompt && deduplicator.shouldDeduplicate(systemPrompt)) {
const isFirstTime = deduplicator.isFirstTimeInSession(hashedSystem.hash);
if (isFirstTime) {
// First time: output truncated content, but store in dictionary
deduplicator.storeContentWithHash(systemPrompt, hashedSystem.hash);
finalSystemPrompt = hashedSystem.content; // Use truncated content
} else {
// Subsequent times: output only reference
finalSystemPrompt = deduplicator.storeContentWithHash(
systemPrompt,
hashedSystem.hash
);
}
}
}
const logEntry = {
type: "llm_request",
correlationId,
sessionId,
provider,
model,
stream: stream || false,
destinationUrl,
destinationHostname: hostname,
destinationPort: port,
protocol,
userMessages: finalUserMessages,
systemPrompt: finalSystemPrompt,
tools: Array.isArray(tools) ? tools : null,
maxTokens: maxTokens || null,
contentTruncated: hashedMessages.truncated || hashedSystem.truncated,
msg: "LLM request initiated",
};
// Add original length indicators if truncated
if (hashedMessages.truncated) {
logEntry.userMessagesOriginalLength = hashedMessages.originalLength;
}
if (hashedSystem.truncated) {
logEntry.systemPromptOriginalLength = hashedSystem.originalLength;
}
logger.info(logEntry);
// Log hash annotation for easy lookup
logHashAnnotation({
userMessagesHash: hashedMessages.hash,
systemPromptHash: hashedSystem.hash,
});
},
/**
* Log LLM response (response received from provider)
* @param {Object} context - Response context
*/
logLlmResponse(context) {
const {
correlationId,
sessionId,
provider,
model,
stream,
destinationUrl,
destinationHostname,
destinationIp,
destinationIpFamily,
assistantMessage,
stopReason,
requestTokens,
responseTokens,
latencyMs,
status,
error,
streamingNote,
} = context;
const { hostname, port, protocol } = parseDestinationUrl(destinationUrl);
// Truncate response content if needed (but not for streaming)
let truncatedMessage = { content: null, truncated: false };
if (assistantMessage && !stream) {
truncatedMessage = truncateContent(assistantMessage, maxContentLength.response);
}
const logEntry = {
type: "llm_response",
correlationId,
sessionId,
provider,
model,
stream: stream || false,
destinationUrl,
destinationHostname: destinationHostname || hostname,
destinationPort: port,
destinationIp: destinationIp || null,
destinationIpFamily: destinationIpFamily || null,
protocol,
status: status || null,
latencyMs: latencyMs || null,
msg: error ? "LLM request failed" : "LLM response received",
};
// Add response content for non-streaming
if (!stream && assistantMessage) {
logEntry.assistantMessage = truncatedMessage.content;
logEntry.stopReason = stopReason || null;
logEntry.contentTruncated = truncatedMessage.truncated;
if (truncatedMessage.truncated) {
logEntry.assistantMessageOriginalLength = truncatedMessage.originalLength;
}
}
// Add streaming note if applicable
if (stream && streamingNote) {
logEntry.streamingNote = streamingNote;
}
// Add token usage
if (requestTokens || responseTokens) {
logEntry.usage = {
requestTokens: requestTokens || null,
responseTokens: responseTokens || null,
totalTokens: (requestTokens || 0) + (responseTokens || 0),
};
}
// Add error details if present
if (error) {
logEntry.error = typeof error === "string" ? error : error.message || "Unknown error";
logEntry.errorStack = error.stack || null;
}
logger.info(logEntry);
},
/**
* Log query-response pair with full content (NO truncation)
* This is logged AFTER the response for easy query/response correlation
* @param {Object} context - Query-response context
*/
logQueryResponsePair(context) {
const {
correlationId,
sessionId,
provider,
model,
requestTime,
responseTime,
userQuery,
assistantResponse,
stopReason,
latencyMs,
requestTokens,
responseTokens,
} = context;
// Hash BEFORE truncate - apply smart truncation to userQuery
const hashedQuery = hashAndTruncateSystemReminder(userQuery, 50, deduplicator);
// Deduplicate userQuery if original content is large enough
// Session-level deduplication: first time outputs truncated content, subsequent times output reference
let finalUserQuery = hashedQuery.content;
if (deduplicator && userQuery && deduplicator.shouldDeduplicate(userQuery)) {
const isFirstTime = deduplicator.isFirstTimeInSession(hashedQuery.hash);
if (isFirstTime) {
// First time: output truncated content, but store in dictionary
deduplicator.storeContentWithHash(userQuery, hashedQuery.hash);
finalUserQuery = hashedQuery.content; // Use truncated content
} else {
// Subsequent times: output only reference
finalUserQuery = deduplicator.storeContentWithHash(userQuery, hashedQuery.hash);
}
}
const logEntry = {
type: "llm_query_response_pair",
correlationId,
sessionId,
provider,
model,
requestTime,
responseTime,
latencyMs: latencyMs || null,
userQuery: finalUserQuery, // Smart truncation + deduplication applied
assistantResponse, // Full response, NO truncation or deduplication (usually unique)
stopReason: stopReason || null,
msg: "Query-response pair (full content)",
};
// Add truncation metadata if truncation occurred
if (hashedQuery.truncated) {
logEntry.userQueryTruncated = true;
logEntry.userQueryOriginalLength = hashedQuery.originalLength;
logEntry.userQueryCharsRemoved = hashedQuery.charsRemoved;
}
// Add token usage if available
if (requestTokens || responseTokens) {
logEntry.usage = {
requestTokens: requestTokens || null,
responseTokens: responseTokens || null,
totalTokens: (requestTokens || 0) + (responseTokens || 0),
};
}
logger.info(logEntry);
// Log hash annotation for easy lookup
logHashAnnotation({
userQueryHash: hashedQuery.hash,
});
},
/**
* Restore full content from hash references in a log entry
* @param {Object} entry - Log entry with potential hash references
* @returns {Object} Entry with full content restored
*/
restoreLogEntry(entry) {
return deduplicator ? deduplicator.restoreEntry(entry) : entry;
},
/**
* Get deduplication statistics
* @returns {Object|null} Statistics or null if deduplication disabled
*/
getDeduplicationStats() {
return deduplicator ? deduplicator.getStats() : null;
},
enabled: true,
};
}
module.exports = {
createAuditLogger: createAuditLoggerWrapper,
truncateContent,
truncateSystemReminder,
hashAndTruncate,
hashAndTruncateSystemReminder,
parseDestinationUrl,
};