@dooor-ai/toolkit
Version:
Guards, Evals & Observability for AI applications - works seamlessly with LangChain/LangGraph
189 lines (163 loc) • 5.32 kB
text/typescript
import { Eval } from "./base";
import { EvalResult, EvalConfig } from "../core/types";
import { getCortexDBClient, getGlobalProviderName } from "../observability/cortexdb-client";
export interface RoleAdherenceConfig extends EvalConfig {
/** Expected role/persona for the LLM */
expectedRole?: string;
/** System prompt (optional, to extract role from) */
systemPrompt?: string;
}
/**
* RoleAdherenceEval - Measures if LLM stays in its assigned role/persona
*
* Evaluates whether the response is consistent with the expected role
* (e.g., "helpful assistant", "pirate captain", "technical support", etc.)
*
* Example:
* ```typescript
* const eval = new RoleAdherenceEval({
* threshold: 0.8,
* expectedRole: "professional financial advisor"
* });
* const result = await eval.evaluate(
* "Should I invest in crypto?",
* "Yolo bro! Just buy whatever coin is trending on TikTok!"
* );
* // result.score = 0.1 (breaks role), result.passed = false
* ```
*/
export class RoleAdherenceEval extends Eval {
private expectedRole?: string;
private systemPrompt?: string;
constructor(config: RoleAdherenceConfig = {}) {
super(config);
this.expectedRole = config.expectedRole;
this.systemPrompt = config.systemPrompt;
}
get name(): string {
return "RoleAdherenceEval";
}
/**
* Set expected role dynamically
*/
setExpectedRole(role: string): void {
this.expectedRole = role;
}
/**
* Set system prompt dynamically
*/
setSystemPrompt(prompt: string): void {
this.systemPrompt = prompt;
}
async evaluate(
input: string,
output: string,
metadata?: Record<string, any>
): Promise<EvalResult> {
const startTime = Date.now();
const role = this.expectedRole || metadata?.expectedRole || metadata?.role;
const systemPrompt = this.systemPrompt || metadata?.systemPrompt;
if (!role && !systemPrompt) {
return {
name: this.name,
score: 0.5,
passed: false,
details: "No role specified for adherence evaluation. Pass 'expectedRole' or 'systemPrompt' via config or metadata.",
metadata: {
latency: Date.now() - startTime,
},
timestamp: new Date(),
};
}
try {
const cortexClient = getCortexDBClient();
const providerName = getGlobalProviderName();
const prompt = this.buildPrompt(role, systemPrompt, input, output);
const response = await cortexClient.invokeAI({
prompt,
usage: "evaluation",
providerName: providerName || undefined,
temperature: 0.0,
maxTokens: 300,
});
const score = this.parseScore(response.text);
const passed = score >= this.getThreshold();
return {
name: this.name,
score,
passed,
details: `Role adherence score: ${score.toFixed(2)}. ${passed ? "PASSED" : "FAILED"} (threshold: ${this.getThreshold()})`,
metadata: {
latency: Date.now() - startTime,
judgeResponse: response.text,
roleChecked: role || "extracted from system prompt",
},
timestamp: new Date(),
};
} catch (error) {
console.error("RoleAdherenceEval failed:", error);
return {
name: this.name,
score: 0.5,
passed: false,
details: `Eval failed: ${error instanceof Error ? error.message : "Unknown error"}`,
metadata: {
error: String(error),
latency: Date.now() - startTime,
},
timestamp: new Date(),
};
}
}
private buildPrompt(
role: string | undefined,
systemPrompt: string | undefined,
userInput: string,
assistantOutput: string
): string {
const roleDescription = role
? `Expected Role: "${role}"`
: `System Prompt: """
${systemPrompt}
"""`;
return `You are an expert evaluator. Your task is to assess if the response adheres to the ASSIGNED ROLE/PERSONA.
${roleDescription}
User Input: "${userInput}"
Assistant Response: "${assistantOutput}"
Evaluate role adherence:
- 1.0 = Perfect adherence, response fully consistent with role
- 0.7-0.9 = Good adherence, mostly in character with minor slips
- 0.4-0.6 = Partial adherence, noticeable out-of-character moments
- 0.0-0.3 = Poor adherence, breaks character or contradicts role
Consider:
- Tone and language style
- Expertise level and domain knowledge
- Behavioral patterns and personality
- Constraints defined in the role
Output ONLY a JSON object in this exact format:
{
"score": 0.9,
"reasoning": "Assessment of how well the response matches the expected role"
}`;
}
private parseScore(response: string): number {
try {
const jsonMatch = response.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
if (typeof parsed.score === "number") {
return Math.max(0, Math.min(1, parsed.score));
}
}
const numberMatch = response.match(/\b0?\.\d+\b|\b1\.0\b|\b[01]\b/);
if (numberMatch) {
return Math.max(0, Math.min(1, parseFloat(numberMatch[0])));
}
console.warn("Could not parse score from response:", response);
return 0.5;
} catch (error) {
console.error("Error parsing score:", error);
return 0.5;
}
}
}