@ai2070/l0
Version:
L0: The Missing Reliability Substrate for AI
272 lines • 8.8 kB
JavaScript
export const BAD_PATTERNS = {
META_COMMENTARY: [
/as an ai language model/i,
/as an ai assistant/i,
/i'm an ai/i,
/i am an ai/i,
/i don't have personal/i,
/i cannot actually/i,
/i apologize, but i/i,
/i'm sorry, but i/i,
],
EXCESSIVE_HEDGING: [
/^sure!?\s*$/im,
/^certainly!?\s*$/im,
/^of course!?\s*$/im,
/^absolutely!?\s*$/im,
],
REFUSAL: [
/i cannot provide/i,
/i'm not able to/i,
/i can't assist with/i,
/i'm unable to/i,
/that would be inappropriate/i,
],
INSTRUCTION_LEAK: [
/\[system\]/i,
/\[user\]/i,
/\[assistant\]/i,
/<\|im_start\|>/i,
/<\|im_end\|>/i,
/###\s*instruction/i,
/###\s*system/i,
],
PLACEHOLDERS: [
/\[insert .+?\]/i,
/\[todo:?\]/i,
/\[placeholder\]/i,
/\[your .+? here\]/i,
/\{\{.+?\}\}/,
],
FORMAT_COLLAPSE: [
/here is the .+?:/i,
/here's the .+?:/i,
/let me .+? for you/i,
/i'll .+? for you/i,
],
};
export function findBadPatterns(content, patterns) {
const matches = [];
for (const pattern of patterns) {
const match = content.match(pattern);
if (match) {
matches.push({
pattern,
match: match[0],
index: match.index ?? 0,
});
}
}
return matches;
}
export function detectMetaCommentary(context) {
const { content } = context;
const violations = [];
const matches = findBadPatterns(content, BAD_PATTERNS.META_COMMENTARY);
for (const match of matches) {
violations.push({
rule: "pattern-meta-commentary",
message: `Meta commentary detected: "${match.match}"`,
severity: "error",
position: match.index,
recoverable: true,
suggestion: "Retry generation without meta commentary",
});
}
return violations;
}
export function detectExcessiveHedging(context) {
const { content } = context;
const violations = [];
const firstLine = content.trim().split("\n")[0] ?? "";
const matches = findBadPatterns(firstLine, BAD_PATTERNS.EXCESSIVE_HEDGING);
if (matches.length > 0 && matches[0]) {
violations.push({
rule: "pattern-hedging",
message: `Excessive hedging at start: "${matches[0].match}"`,
severity: "warning",
position: matches[0].index,
recoverable: true,
suggestion: "Content should start directly without hedging",
});
}
return violations;
}
export function detectRefusal(context) {
const { content } = context;
const violations = [];
const matches = findBadPatterns(content, BAD_PATTERNS.REFUSAL);
for (const match of matches) {
violations.push({
rule: "pattern-refusal",
message: `Refusal pattern detected: "${match.match}"`,
severity: "error",
position: match.index,
recoverable: false,
suggestion: "Model refused to complete the task",
});
}
return violations;
}
export function detectInstructionLeakage(context) {
const { content } = context;
const violations = [];
const matches = findBadPatterns(content, BAD_PATTERNS.INSTRUCTION_LEAK);
for (const match of matches) {
violations.push({
rule: "pattern-instruction-leak",
message: `Instruction leakage detected: "${match.match}"`,
severity: "error",
position: match.index,
recoverable: true,
suggestion: "Retry generation without system tokens",
});
}
return violations;
}
export function detectPlaceholders(context) {
const { content, completed } = context;
const violations = [];
if (!completed) {
return violations;
}
const matches = findBadPatterns(content, BAD_PATTERNS.PLACEHOLDERS);
for (const match of matches) {
violations.push({
rule: "pattern-placeholders",
message: `Placeholder detected: "${match.match}"`,
severity: "error",
position: match.index,
recoverable: true,
suggestion: "Output contains incomplete placeholders",
});
}
return violations;
}
export function detectFormatCollapse(context) {
const { content } = context;
const violations = [];
const firstLines = content.split("\n").slice(0, 3).join("\n");
const matches = findBadPatterns(firstLines, BAD_PATTERNS.FORMAT_COLLAPSE);
if (matches.length > 0 && matches[0]) {
violations.push({
rule: "pattern-format-collapse",
message: `Format collapse detected: "${matches[0].match}"`,
severity: "warning",
position: matches[0].index,
recoverable: true,
suggestion: "Output should not mix meta-instructions with content",
});
}
return violations;
}
export function detectRepetition(context, threshold = 2) {
const { content, completed } = context;
const violations = [];
if (!completed) {
return violations;
}
const sentences = content
.split(/[.!?]+/)
.map((s) => s.trim().toLowerCase())
.filter((s) => s.length > 20);
const counts = new Map();
for (const sentence of sentences) {
counts.set(sentence, (counts.get(sentence) || 0) + 1);
}
for (const [sentence, count] of counts.entries()) {
if (count > threshold) {
violations.push({
rule: "pattern-repetition",
message: `Sentence repeated ${count} times: "${sentence.slice(0, 50)}..."`,
severity: "error",
recoverable: true,
suggestion: "Content contains repeated sentences",
});
}
}
return violations;
}
export function detectFirstLastDuplicate(context) {
const { content, completed } = context;
const violations = [];
if (!completed || content.length < 100) {
return violations;
}
const sentences = content
.split(/[.!?]+/)
.map((s) => s.trim())
.filter((s) => s.length > 10);
if (sentences.length < 2) {
return violations;
}
const first = sentences[0].toLowerCase();
const last = sentences[sentences.length - 1].toLowerCase();
if (first === last) {
violations.push({
rule: "pattern-first-last-duplicate",
message: "First and last sentences are identical",
severity: "error",
recoverable: true,
suggestion: "Retry generation - possible loop detected",
});
}
return violations;
}
export function patternRule(_config) {
return {
name: "pattern-detection",
description: "Detects known bad patterns in model output",
streaming: false,
severity: "error",
recoverable: true,
check: (context) => {
const violations = [];
violations.push(...detectMetaCommentary(context));
violations.push(...detectExcessiveHedging(context));
violations.push(...detectRefusal(context));
violations.push(...detectInstructionLeakage(context));
violations.push(...detectPlaceholders(context));
violations.push(...detectFormatCollapse(context));
violations.push(...detectRepetition(context));
violations.push(...detectFirstLastDuplicate(context));
return violations;
},
};
}
export function customPatternRule(patterns, message = "Custom pattern detected", severity = "error") {
return {
name: "pattern-custom",
description: "Custom pattern matching",
streaming: false,
severity,
recoverable: severity !== "fatal",
check: (context) => {
const violations = [];
const matches = findBadPatterns(context.content, patterns);
for (const match of matches) {
violations.push({
rule: "pattern-custom",
message: `${message}: "${match.match}"`,
severity,
position: match.index,
recoverable: severity !== "fatal",
});
}
return violations;
},
};
}
export class PatternGuardrail {
rule;
constructor(config) {
this.rule = patternRule(config);
}
check(context) {
return this.rule.check(context);
}
get name() {
return this.rule.name;
}
}
//# sourceMappingURL=patterns.js.map