@ai2070/l0
Version:
L0: The Missing Reliability Substrate for AI
238 lines • 8.84 kB
JavaScript
export class DriftDetector {
config;
history;
constructor(config = {}) {
this.config = {
detectToneShift: config.detectToneShift ?? true,
detectMetaCommentary: config.detectMetaCommentary ?? true,
detectRepetition: config.detectRepetition ?? true,
detectEntropySpike: config.detectEntropySpike ?? true,
repetitionThreshold: config.repetitionThreshold ?? 3,
entropyThreshold: config.entropyThreshold ?? 2.5,
entropyWindow: config.entropyWindow ?? 50,
};
this.history = {
entropy: [],
tokens: [],
lastContent: "",
};
}
check(content, delta) {
const types = [];
let confidence = 0;
const details = [];
if (delta) {
this.history.tokens.push(delta);
if (this.history.tokens.length > this.config.entropyWindow) {
this.history.tokens.shift();
}
}
if (this.config.detectMetaCommentary) {
const meta = this.detectMetaCommentary(content);
if (meta) {
types.push("meta_commentary");
confidence = Math.max(confidence, 0.9);
details.push("Meta commentary detected");
}
}
if (this.config.detectToneShift) {
const tone = this.detectToneShift(content, this.history.lastContent);
if (tone) {
types.push("tone_shift");
confidence = Math.max(confidence, 0.7);
details.push("Tone shift detected");
}
}
if (this.config.detectRepetition) {
const rep = this.detectRepetition(content);
if (rep) {
types.push("repetition");
confidence = Math.max(confidence, 0.8);
details.push("Excessive repetition detected");
}
}
if (this.config.detectEntropySpike && delta) {
const entropy = this.calculateEntropy(delta);
this.history.entropy.push(entropy);
if (this.history.entropy.length > this.config.entropyWindow) {
this.history.entropy.shift();
}
if (this.detectEntropySpike()) {
types.push("entropy_spike");
confidence = Math.max(confidence, 0.6);
details.push("Entropy spike detected");
}
}
if (this.detectFormatCollapse(content)) {
types.push("format_collapse");
confidence = Math.max(confidence, 0.8);
details.push("Format collapse detected");
}
if (this.detectMarkdownCollapse(content, this.history.lastContent)) {
types.push("markdown_collapse");
confidence = Math.max(confidence, 0.7);
details.push("Markdown formatting collapse detected");
}
if (this.detectExcessiveHedging(content)) {
types.push("hedging");
confidence = Math.max(confidence, 0.5);
details.push("Excessive hedging detected");
}
this.history.lastContent = content;
return {
detected: types.length > 0,
confidence,
types,
details: details.join("; "),
};
}
detectMetaCommentary(content) {
const metaPatterns = [
/as an ai/i,
/i'm an ai/i,
/i am an ai/i,
/i cannot actually/i,
/i don't have personal/i,
/i apologize, but i/i,
/i'm sorry, but i/i,
/let me explain/i,
/to clarify/i,
/in other words/i,
];
const recent = content.slice(-200);
return metaPatterns.some((pattern) => pattern.test(recent));
}
detectToneShift(content, previousContent) {
if (!previousContent || previousContent.length < 100) {
return false;
}
const recentChunk = content.slice(-200);
const previousChunk = previousContent.slice(-200);
const formalMarkers = /\b(therefore|thus|hence|moreover|furthermore|consequently)\b/gi;
const recentFormal = (recentChunk.match(formalMarkers) || []).length;
const previousFormal = (previousChunk.match(formalMarkers) || []).length;
const informalMarkers = /\b(gonna|wanna|yeah|yep|nope|ok|okay)\b/gi;
const recentInformal = (recentChunk.match(informalMarkers) || []).length;
const previousInformal = (previousChunk.match(informalMarkers) || [])
.length;
const formalShift = Math.abs(recentFormal - previousFormal) > 2;
const informalShift = Math.abs(recentInformal - previousInformal) > 2;
return formalShift || informalShift;
}
detectRepetition(content) {
const sentences = content
.split(/[.!?]+/)
.map((s) => s.trim().toLowerCase())
.filter((s) => s.length > 20);
if (sentences.length < 3) {
return false;
}
const counts = new Map();
for (const sentence of sentences) {
counts.set(sentence, (counts.get(sentence) || 0) + 1);
}
for (const count of counts.values()) {
if (count >= this.config.repetitionThreshold) {
return true;
}
}
const words = content.toLowerCase().split(/\s+/);
const phrases = new Map();
for (let i = 0; i < words.length - 5; i++) {
const phrase = words.slice(i, i + 5).join(" ");
phrases.set(phrase, (phrases.get(phrase) || 0) + 1);
}
for (const count of phrases.values()) {
if (count >= this.config.repetitionThreshold) {
return true;
}
}
return false;
}
calculateEntropy(text) {
if (!text || text.length === 0) {
return 0;
}
const frequencies = new Map();
for (const char of text) {
frequencies.set(char, (frequencies.get(char) || 0) + 1);
}
let entropy = 0;
const length = text.length;
for (const count of frequencies.values()) {
const probability = count / length;
entropy -= probability * Math.log2(probability);
}
return entropy;
}
detectEntropySpike() {
if (this.history.entropy.length < 10) {
return false;
}
const mean = this.history.entropy.reduce((a, b) => a + b, 0) /
this.history.entropy.length;
const variance = this.history.entropy.reduce((acc, val) => acc + Math.pow(val - mean, 2), 0) / this.history.entropy.length;
const stdDev = Math.sqrt(variance);
const last = this.history.entropy[this.history.entropy.length - 1] ?? 0;
return last > mean + this.config.entropyThreshold * stdDev;
}
detectFormatCollapse(content) {
const collapsePatterns = [
/here is the .+?:/i,
/here's the .+?:/i,
/let me .+? for you/i,
/i'll .+? for you/i,
/here you go/i,
];
const beginning = content.slice(0, 100);
return collapsePatterns.some((pattern) => pattern.test(beginning));
}
detectMarkdownCollapse(content, previousContent) {
if (!previousContent || previousContent.length < 100) {
return false;
}
const markdownPatterns = [
/```/g,
/^#{1,6}\s/gm,
/\*\*.*?\*\*/g,
/\[.*?\]\(.*?\)/g,
];
const recent = content.slice(-200);
const previous = previousContent.slice(-200);
let recentMarkdown = 0;
let previousMarkdown = 0;
for (const pattern of markdownPatterns) {
recentMarkdown += (recent.match(pattern) || []).length;
previousMarkdown += (previous.match(pattern) || []).length;
}
return previousMarkdown > 3 && recentMarkdown === 0;
}
detectExcessiveHedging(content) {
const hedgingPatterns = [
/^sure!?\s*$/im,
/^certainly!?\s*$/im,
/^of course!?\s*$/im,
/^absolutely!?\s*$/im,
];
const firstLine = content.trim().split("\n")[0] ?? "";
return hedgingPatterns.some((pattern) => pattern.test(firstLine));
}
reset() {
this.history = {
entropy: [],
tokens: [],
lastContent: "",
};
}
getHistory() {
return { ...this.history };
}
}
export function createDriftDetector(config) {
return new DriftDetector(config);
}
export function checkDrift(content) {
const detector = new DriftDetector();
return detector.check(content);
}
//# sourceMappingURL=drift.js.map