UNPKG

@mettamatt/code-reasoning

Version:

Enhanced MCP server for code reasoning using sequential thinking methodology, optimized for programming tasks

github.com/mettamatt/code-reasoning

mettamatt/code-reasoning

145 lines (131 loc) • 5.7 kB

JavaScript

import { getActivePrompt, SYSTEM_PROMPT } from './core-prompts.js'; // Create a formatted prompt export function createPrompt(scenarioPrompt) { const { prompt: toolDescription } = getActivePrompt(); const prompt = `I'd like you to solve a problem using sequential thinking methodology. Break down your reasoning into explicit steps. Here is the code reasoning tool description that explains the format to use: ${toolDescription} CRITICAL FORMATTING INSTRUCTIONS: For each thought, you MUST output a valid JSON object with EXACTLY these properties: 1. "thought" (string) – your current reasoning step 2. "thought_number" (integer ≥ 1) 3. "total_thoughts" (integer ≥ 1) – estimated final count 4. "next_thought_needed" (boolean) Optional properties: - "is_revision" (boolean) - "revises_thought" (integer) - "branch_from_thought" (integer) - "branch_id" (string) - "needs_more_thoughts" (boolean) Please solve the following problem using this sequential thinking format: ${scenarioPrompt} REMEMBER: Each thought MUST be a valid JSON object containing at minimum the exact fields listed above.`; return prompt; } // Extract thought records from API response export function extractThoughtRecords(text) { const JSON_PATTERN = /```(?:json)?\s*({[\s\S]*?})\s*```|({[\s\S]*?"next_thought_needed"[\s\S]*?})/g; const records = []; const matches = text.matchAll(JSON_PATTERN); for (const m of matches) { const jsonChunk = m[1] || m[2]; if (!jsonChunk) continue; try { const data = JSON.parse(jsonChunk); if (typeof data.thought === 'string' && typeof data.thought_number === 'number' && typeof data.total_thoughts === 'number' && typeof data.next_thought_needed === 'boolean') { records.push(data); } } catch { // Skip invalid chunks } } return records.sort((a, b) => a.thought_number - b.thought_number); } // Call the Anthropic API export async function callAPI(apiKey, scenarioPrompt, options = {}) { const defaultOptions = { model: process.env.CLAUDE_MODEL || 'claude-3-7-sonnet-20250219', maxTokens: parseInt(process.env.MAX_TOKENS || '4000'), temperature: parseFloat(process.env.TEMPERATURE || '0.2'), }; const cfg = { ...defaultOptions, ...options }; try { const { Anthropic } = await import('@anthropic-ai/sdk'); const client = new Anthropic({ apiKey }); const resp = await client.messages.create({ model: cfg.model, max_tokens: cfg.maxTokens, temperature: cfg.temperature, system: SYSTEM_PROMPT, messages: [{ role: 'user', content: createPrompt(scenarioPrompt) }], }); const raw = resp.content?.[0]?.type === 'text' ? resp.content[0].text : ''; const thoughtChain = extractThoughtRecords(raw); return { success: true, thoughtChain, rawResponse: raw }; } catch (error) { return { success: false, error: error.message }; } } // Evaluate solution quality export async function evaluateQuality(apiKey, scenario, thoughtChain, options = {}) { const defaultOptions = { model: process.env.CLAUDE_MODEL || 'claude-3-7-sonnet-20250219', temperature: 0.2, }; const cfg = { ...defaultOptions, ...options }; try { const { Anthropic } = await import('@anthropic-ai/sdk'); const client = new Anthropic({ apiKey }); const prompt = `You are a RIGOROUS, CRITICAL evaluator assessing the quality of a solution to a coding or system design problem. Be exceptionally demanding in your evaluation. PROBLEM: ${scenario.problem} DIFFICULTY LEVEL: ${scenario.difficulty} SOLUTION TO EVALUATE: ${thoughtChain.map(t => t.thought).join('\n\n')} SCORING GUIDELINES: - 90-100%: EXCEPTIONAL solution - virtually flawless, demonstrates expert-level understanding - 75-89%: STRONG solution - minor improvements possible, but well-executed - 60-74%: ADEQUATE solution - correctly solves the problem but with inefficiencies - 40-59%: FLAWED solution - partially solves the problem but with significant issues - 0-39%: POOR solution - fundamental misunderstandings or critical failures BE HARSH AND CRITICAL. Reserve high scores (>85%) ONLY for truly exceptional solutions. Output your evaluation as a valid JSON object with EXACTLY this structure: { "qualityScore": 67, "justification": "Concise 1-2 line explanation focusing on key strengths/weaknesses." }`; const resp = await client.messages.create({ model: cfg.model, max_tokens: 1000, temperature: cfg.temperature, system: 'You are an expert, highly critical evaluator of solution quality with exceptionally high standards. Be rigorous and strict in your assessment. Return valid JSON only.', messages: [{ role: 'user', content: prompt }], }); const text = resp.content?.[0]?.type === 'text' ? resp.content[0].text : ''; const json = text.match(/\{[\s\S]*}/)?.[0]; if (!json) throw new Error('No JSON found in response'); const evaluation = JSON.parse(json); if (typeof evaluation.qualityScore !== 'number') { throw new Error('Invalid quality evaluation: missing qualityScore'); } return { success: true, qualityScore: evaluation.qualityScore, justification: evaluation.justification, }; } catch (error) { return { success: false, error: error.message, }; } }