UNPKG

@r-huijts/ethics-vibe-check

Version:

🛡️ Make AI interrupt itself and challenge your thinking. Turns Claude into a philosophical sparring partner who actively contradicts comfortable conversations and challenges confirmation bias.

github.com/yourusername/ethics-check-mcp

yourusername/ethics-check-mcp

191 lines (174 loc) • 10.1 kB

JavaScript

import { generateEthicsResponse, cleanGeminiJsonResponse } from '../utils/gemini.js'; import { getRecentConcerns, addEthicalConcern, getWeightedConcerns } from '../utils/storage.js'; // 🧠 Helper function to build enriched context from weighted concerns function buildWeightedConcernsContext(weightedConcerns) { if (weightedConcerns.length === 0) return ''; const topConcerns = weightedConcerns.slice(0, 5); // Most relevant concerns const categoryPatterns = new Map(); // Group by category for pattern recognition weightedConcerns.forEach(concern => { const category = concern.category; if (!categoryPatterns.has(category)) { categoryPatterns.set(category, []); } categoryPatterns.get(category).push(concern); }); let context = `**WEIGHTED ETHICAL PATTERN ANALYSIS** (${weightedConcerns.length} relevant concerns found):\n\n`; // Top weighted concerns context += `**MOST RELEVANT CONCERNS:**\n`; topConcerns.forEach((concern, index) => { context += `${index + 1}. [Weight: ${concern.totalScore.toFixed(2)}] ${concern.category}: ${concern.concern}\n`; context += ` → Recommendation: ${concern.recommendation}\n`; if (concern.successScore !== undefined) { context += ` → Success Score: ${concern.successScore.toFixed(2)} (${concern.successScore > 0.7 ? 'highly effective' : concern.successScore > 0.4 ? 'moderately effective' : 'needs improvement'})\n`; } context += `\n`; }); // Category patterns context += `**PATTERN ANALYSIS BY CATEGORY:**\n`; Array.from(categoryPatterns.entries()) .sort(([, a], [, b]) => b.length - a.length) // Sort by frequency .slice(0, 3) // Top 3 categories .forEach(([category, concerns]) => { const avgWeight = concerns.reduce((sum, c) => sum + c.totalScore, 0) / concerns.length; const avgSuccess = concerns.reduce((sum, c) => sum + (c.successScore || 0.5), 0) / concerns.length; context += `• ${category}: ${concerns.length} instances, avg weight: ${avgWeight.toFixed(2)}, avg success: ${avgSuccess.toFixed(2)}\n`; }); context += `\n**PRIORITY FOCUS**: Pay special attention to patterns with high weights and categories with repeated occurrences.\n\n`; return context; } export async function ethicsCheckTool(input) { console.error('Starting ethics check analysis...'); // 🧠 WEIGHTED PATTERN RECOGNITION: Get intelligently scored concerns const weightedConcerns = getWeightedConcerns({ limit: 10, context: `${input.userRequest} ${input.context || ''}`, sessionId: input.sessionId, // Prioritize severity and relevance for ethics analysis weights: { recency: 0.25, severity: 0.35, success: 0.2, relevance: 0.2 } }); // Build enriched context from weighted concerns const enrichedContext = weightedConcerns.length > 0 ? buildWeightedConcernsContext(weightedConcerns) : ''; // Fallback to simple recent concerns if weighted system has no data const fallbackConcerns = weightedConcerns.length === 0 ? getRecentConcerns(5) : []; const fallbackContext = fallbackConcerns.length > 0 ? `Basic pattern context: ${fallbackConcerns.map(c => `${c.category}: ${c.concern}`).join('; ')}` : ''; const prompt = `You are an AI ethics specialist conducting a comprehensive ethical review of an AI conversation. Your role is to identify potential ethical concerns, assess risks, and provide actionable recommendations. 🧠 **ENHANCED PATTERN RECOGNITION**: You have access to weighted historical ethical concerns that have been prioritized by relevance, severity, and success of previous recommendations. CRITICAL FOCUS: Pay special attention to CONFIRMATION BIAS - instances where the AI is being overly agreeable, failing to challenge questionable assumptions, or simply reinforcing what the user wants to hear instead of providing balanced, critical analysis. CONVERSATION TO ANALYZE: ${input.conversation} ORIGINAL USER REQUEST: ${input.userRequest} ${input.context ? `ADDITIONAL CONTEXT:\n${input.context}` : ''} ${enrichedContext} ${fallbackContext} ${input.previousConcerns ? `PREVIOUS CONCERNS:\n${input.previousConcerns}` : ''} ${input.focusAreas ? `FOCUS AREAS:\n${input.focusAreas.join(', ')}` : ''} Please analyze this conversation for ethical concerns across these dimensions: 1. **Privacy & Data Protection**: Are personal data, privacy rights, or confidentiality being respected? 2. **Bias & Fairness**: Are there signs of unfair treatment, discrimination, or biased assumptions? 3. **Truthfulness & Accuracy**: Is information accurate, or could misinformation be spread? 4. **Harm Prevention**: Could the conversation lead to physical, emotional, or societal harm? 5. **Consent & Autonomy**: Are people's choices and autonomy being respected? 6. **Transparency**: Is the AI being honest about its capabilities and limitations? 7. **Manipulation**: Are there attempts to manipulate, deceive, or exploit vulnerabilities? 8. **CONFIRMATION BIAS**: Is the AI being overly agreeable or failing to provide critical analysis? **CONFIRMATION BIAS ANALYSIS - Look for these patterns:** - AI immediately agreeing with controversial or questionable statements - Failing to present alternative perspectives or counterarguments - Not challenging potentially flawed assumptions in the user's request - Providing one-sided information that only supports the user's viewpoint - Avoiding difficult questions or uncomfortable truths - Being "helpful" at the expense of being truthful or balanced - Not encouraging the user to consider potential downsides or risks - Reinforcing existing beliefs without critical examination **CRITICAL THINKING GAPS - Identify where the AI should have:** - Asked clarifying questions about assumptions - Presented multiple perspectives on complex issues - Highlighted potential risks or unintended consequences - Encouraged deeper reflection on the topic - Provided balanced information including counterarguments - Challenged potentially harmful or misguided requests For each concern identified, provide: - Category (from the list above, including "Confirmation Bias") - Severity level (low/medium/high/critical) - Specific description of the concern - Actionable recommendation Respond in this JSON format: { "ethicalAssessment": "Overall assessment of the conversation's ethical implications, with special focus on critical thinking quality", "concerns": [ { "category": "Category name", "severity": "low|medium|high|critical", "description": "Specific description of the concern", "recommendation": "Actionable recommendation to address this concern" } ], "overallRisk": "low|medium|high|critical", "recommendations": ["List of general recommendations for improving ethical compliance"], "criticalThinkingGaps": ["List of specific areas where more critical analysis was needed"] } If no significant ethical concerns are found, still provide the assessment with an empty concerns array and appropriate recommendations for maintaining ethical standards and critical thinking.`; try { const response = await generateEthicsResponse(prompt); // Debug: Log response length for monitoring console.error('Gemini response received, length:', response.length); // Try to parse JSON response try { // Clean up the response - Gemini might wrap JSON in markdown code blocks const cleanResponse = cleanGeminiJsonResponse(response); const parsed = JSON.parse(cleanResponse); // Auto-store concerns by default (unless explicitly disabled) let storedCount = 0; const shouldAutoStore = input.autoStore !== false; // Default to true if (shouldAutoStore && parsed.concerns && parsed.concerns.length > 0) { console.error(`Auto-storing ${parsed.concerns.length} identified concerns...`); for (const concern of parsed.concerns) { try { const success = addEthicalConcern({ concern: concern.description, category: concern.category, severity: concern.severity, recommendation: concern.recommendation, sessionId: input.sessionId }); if (success) { storedCount++; } } catch (error) { console.error(`Failed to store concern: ${concern.description.substring(0, 50)}...`, error); } } if (storedCount > 0) { console.error(`✅ Successfully stored ${storedCount} concerns for future pattern analysis`); } } console.error('Ethics check analysis complete'); // Add storage info to response if auto-storage was used const result = shouldAutoStore ? { ...parsed, storedConcerns: storedCount } : parsed; return result; } catch (parseError) { console.error('❌ JSON parsing failed:', parseError instanceof Error ? parseError.message : 'Unknown parse error'); console.error('Parse error details:', parseError); // Fallback: return a structured response based on the text return { ethicalAssessment: response, concerns: [], overallRisk: 'medium', recommendations: ['Review the detailed assessment above for specific guidance'], criticalThinkingGaps: ['Unable to parse detailed analysis - review full response above'] }; } } catch (error) { console.error('Error in ethics check:', error); throw new Error('Failed to complete ethics analysis'); } }