UNPKG

markov-exa-mcp-server

Version:

A Model Context Protocol server with Exa for web search, academic paper search, and Twitter/X.com search. Provides real-time web searches with configurable tool selection, allowing users to enable or disable specific search capabilities. Supports customiz

611 lines (610 loc) 27.8 kB
import { z } from "zod"; import axios from "axios"; import { API_CONFIG } from "./config.js"; import { createRequestLogger } from "../utils/logger.js"; import * as fs from "fs/promises"; import * as path from "path"; // Store for large research results const researchResultsCache = new Map(); // Helper function to clear old cache entries (keep for 1 hour) function cleanupCache() { const oneHourAgo = Date.now() - 60 * 60 * 1000; for (const [key, value] of researchResultsCache.entries()) { // Remove entries older than 1 hour if (!value._cachedAt || value._cachedAt < oneHourAgo) { researchResultsCache.delete(key); } } } export function registerResearchTaskTools(server) { // Tool to create a research task server.tool("exa_research_create_task", "Create a research task using Exa AI's research endpoint. This will return a task ID that can be used to check status and retrieve results.", { instructions: z.string().describe("Research instructions describing what you want to research"), model: z.string().optional().describe("Research model to use (default: exa-research)"), outputSchema: z.any().optional().describe("JSON schema for structured output format"), inferSchema: z.boolean().optional().describe("Allow LLM to generate output schema automatically") }, async ({ instructions, model, outputSchema, inferSchema }) => { const requestId = `research_create-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`; const logger = createRequestLogger(requestId, 'exa_research_create_task'); try { const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey; if (!apiKey) { logger.error('No API key found'); return { content: [{ type: "text", text: "EXA_API_KEY environment variable is not set" }], isError: true }; } const requestBody = { instructions, ...(model && { model }), ...(outputSchema || inferSchema !== undefined ? { output: { ...(outputSchema && { schema: outputSchema }), ...(inferSchema !== undefined && { inferSchema }) } } : {}) }; logger.log(`Sending POST request to ${API_CONFIG.RESEARCH_ENDPOINT}`); logger.log(`Request body: ${JSON.stringify(requestBody, null, 2)}`); const response = await axios.post(API_CONFIG.RESEARCH_ENDPOINT, requestBody, { headers: { "x-api-key": apiKey, "Content-Type": "application/json", }, timeout: API_CONFIG.REQUEST_TIMEOUT, }); logger.log(`Response status: ${response.status}`); logger.log(`Response data: ${JSON.stringify(response.data, null, 2)}`); logger.complete(); return { content: [{ type: "text", text: JSON.stringify({ taskId: response.data.id, message: `Research task created successfully. Task ID: ${response.data.id}. Use 'exa_research_check_status' to monitor progress.` }, null, 2) }] }; } catch (error) { logger.error(error); if (axios.isAxiosError(error)) { return { content: [{ type: "text", text: `Failed to create research task: ${error.response?.data?.message || error.message}` }], isError: true }; } return { content: [{ type: "text", text: `Failed to create research task: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); // Tool to check research task status server.tool("exa_research_check_status", "Check the status of a research task. If completed and response is large, use exa_research_get_data or exa_research_get_citations to retrieve specific parts.", { taskId: z.string().describe("The task ID returned from creating a research task") }, async ({ taskId }) => { const requestId = `research_status-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`; const logger = createRequestLogger(requestId, 'exa_research_check_status'); try { const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey; if (!apiKey) { logger.error('No API key found'); return { content: [{ type: "text", text: "EXA_API_KEY environment variable is not set" }], isError: true }; } const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`; logger.log(`Sending GET request to ${url}`); const response = await axios.get(url, { headers: { "x-api-key": apiKey, }, timeout: API_CONFIG.REQUEST_TIMEOUT, }); logger.log(`Response status: ${response.status}`); logger.log(`Response data: ${JSON.stringify(response.data, null, 2)}`); logger.complete(); if (response.data.status === 'failed') { return { content: [{ type: "text", text: JSON.stringify({ taskId: response.data.id, status: response.data.status, error: response.data.error || 'Task failed with no error message' }, null, 2) }], isError: true }; } // Store the full response in cache for retrieval if (response.data.status === 'completed' && response.data.data) { cleanupCache(); // Clean old entries response.data._cachedAt = Date.now(); researchResultsCache.set(taskId, response.data); } // Check response size const fullResponseText = JSON.stringify(response.data, null, 2); const responseSize = fullResponseText.length; // For large responses, return status info and instructions if (responseSize > 20000) { const statusInfo = { taskId: response.data.id, status: response.data.status, instructions: response.data.instructions, responseSize: responseSize, message: "Response is too large for single retrieval. Use the following tools:", availableTools: { "exa_research_get_data": "Retrieve the research data/results", "exa_research_get_citations": "Retrieve citations for specific fields" }, dataFields: response.data.data ? Object.keys(response.data.data) : [], citationFields: response.data.citations ? Object.keys(response.data.citations) : [], totalCitations: response.data.citations ? Object.values(response.data.citations).reduce((acc, cits) => acc + (Array.isArray(cits) ? cits.length : 0), 0) : 0 }; return { content: [{ type: "text", text: JSON.stringify(statusInfo, null, 2) }] }; } // For smaller responses, return the full data return { content: [{ type: "text", text: fullResponseText }] }; } catch (error) { logger.error(error); if (axios.isAxiosError(error)) { return { content: [{ type: "text", text: `Failed to check research task status: ${error.response?.data?.message || error.message}` }], isError: true }; } return { content: [{ type: "text", text: `Failed to check research task status: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); // Tool to get research data server.tool("exa_research_get_data", "Get the data/results from a completed research task. Use this when the full response is too large.", { taskId: z.string().describe("The task ID returned from creating a research task"), field: z.string().optional().describe("Specific data field to retrieve (e.g., 'headless_options'). If not specified, returns all data.") }, async ({ taskId, field }) => { const requestId = `research_data-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`; const logger = createRequestLogger(requestId, 'exa_research_get_data'); try { // First check cache let taskData = researchResultsCache.get(taskId); // If not in cache, fetch from API if (!taskData) { const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey; if (!apiKey) { logger.error('No API key found'); return { content: [{ type: "text", text: "EXA_API_KEY environment variable is not set" }], isError: true }; } const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`; logger.log(`Fetching from API: ${url}`); const response = await axios.get(url, { headers: { "x-api-key": apiKey, }, timeout: API_CONFIG.REQUEST_TIMEOUT, }); taskData = response.data; // Cache if completed if (taskData.status === 'completed') { cleanupCache(); taskData._cachedAt = Date.now(); researchResultsCache.set(taskId, taskData); } } logger.log(`Task status: ${taskData.status}`); logger.complete(); if (taskData.status !== 'completed') { return { content: [{ type: "text", text: JSON.stringify({ taskId: taskData.id, status: taskData.status, message: taskData.status === 'failed' ? taskData.error || 'Task failed' : 'Task not yet completed' }, null, 2) }] }; } // Return specific field or all data let resultData; if (field && taskData.data) { resultData = { taskId: taskData.id, field: field, data: taskData.data[field] || null }; } else { resultData = { taskId: taskData.id, data: taskData.data || {} }; } return { content: [{ type: "text", text: JSON.stringify(resultData, null, 2) }] }; } catch (error) { logger.error(error); if (axios.isAxiosError(error)) { return { content: [{ type: "text", text: `Failed to get research data: ${error.response?.data?.message || error.message}` }], isError: true }; } return { content: [{ type: "text", text: `Failed to get research data: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); // Tool to get research citations server.tool("exa_research_get_citations", "Get citations from a completed research task. Use this when you need the sources/references.", { taskId: z.string().describe("The task ID returned from creating a research task"), field: z.string().optional().describe("Specific field to get citations for. If not specified, returns all citations.") }, async ({ taskId, field }) => { const requestId = `research_citations-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`; const logger = createRequestLogger(requestId, 'exa_research_get_citations'); try { // First check cache let taskData = researchResultsCache.get(taskId); // If not in cache, fetch from API if (!taskData) { const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey; if (!apiKey) { logger.error('No API key found'); return { content: [{ type: "text", text: "EXA_API_KEY environment variable is not set" }], isError: true }; } const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`; logger.log(`Fetching from API: ${url}`); const response = await axios.get(url, { headers: { "x-api-key": apiKey, }, timeout: API_CONFIG.REQUEST_TIMEOUT, }); taskData = response.data; // Cache if completed if (taskData.status === 'completed') { cleanupCache(); taskData._cachedAt = Date.now(); researchResultsCache.set(taskId, taskData); } } logger.log(`Task status: ${taskData.status}`); logger.complete(); if (taskData.status !== 'completed') { return { content: [{ type: "text", text: JSON.stringify({ taskId: taskData.id, status: taskData.status, message: taskData.status === 'failed' ? taskData.error || 'Task failed' : 'Task not yet completed' }, null, 2) }] }; } // Return specific field citations or all let resultCitations; if (field && taskData.citations) { resultCitations = { taskId: taskData.id, field: field, citations: taskData.citations[field] || [], count: taskData.citations[field]?.length || 0 }; } else { const totalCount = taskData.citations ? Object.values(taskData.citations).reduce((acc, cits) => acc + (Array.isArray(cits) ? cits.length : 0), 0) : 0; resultCitations = { taskId: taskData.id, citations: taskData.citations || {}, totalCount: totalCount }; } return { content: [{ type: "text", text: JSON.stringify(resultCitations, null, 2) }] }; } catch (error) { logger.error(error); if (axios.isAxiosError(error)) { return { content: [{ type: "text", text: `Failed to get research citations: ${error.response?.data?.message || error.message}` }], isError: true }; } return { content: [{ type: "text", text: `Failed to get research citations: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); // Tool to list all research tasks server.tool("exa_research_list_tasks", "List all research tasks with their current status.", { cursor: z.string().optional().describe("Pagination cursor for fetching more results") }, async ({ cursor }) => { const requestId = `research_list-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`; const logger = createRequestLogger(requestId, 'exa_research_list_tasks'); try { const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey; if (!apiKey) { logger.error('No API key found'); return { content: [{ type: "text", text: "EXA_API_KEY environment variable is not set" }], isError: true }; } const params = {}; if (cursor) { params.cursor = cursor; } logger.log(`Sending GET request to ${API_CONFIG.RESEARCH_ENDPOINT}`); logger.log(`Request params: ${JSON.stringify(params, null, 2)}`); const response = await axios.get(API_CONFIG.RESEARCH_ENDPOINT, { headers: { "x-api-key": apiKey, }, params, timeout: API_CONFIG.REQUEST_TIMEOUT, }); logger.log(`Response status: ${response.status}`); logger.log(`Response data: ${JSON.stringify(response.data, null, 2)}`); logger.complete(); return { content: [{ type: "text", text: JSON.stringify({ tasks: response.data.tasks, ...(response.data.cursor && { nextCursor: response.data.cursor }) }, null, 2) }] }; } catch (error) { logger.error(error); if (axios.isAxiosError(error)) { return { content: [{ type: "text", text: `Failed to list research tasks: ${error.response?.data?.message || error.message}` }], isError: true }; } return { content: [{ type: "text", text: `Failed to list research tasks: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); // Tool to save research results to markdown server.tool("exa_research_save_to_markdown", "Save research task results to a markdown file with proper formatting", { taskId: z.string().describe("The task ID of the completed research task"), filePath: z.string().describe("Full path where to save the markdown file (e.g., /path/to/research.md)"), includeMetadata: z.boolean().optional().describe("Include metadata like task ID, timestamp, etc. (default: true)"), includeCitations: z.boolean().optional().describe("Include citations in the markdown (default: true)") }, async ({ taskId, filePath, includeMetadata = true, includeCitations = true }) => { const requestId = `research_save_md-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`; const logger = createRequestLogger(requestId, 'exa_research_save_to_markdown'); try { // First get the research data (from cache or API) let taskData = researchResultsCache.get(taskId); if (!taskData) { const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey; if (!apiKey) { logger.error('No API key found'); return { content: [{ type: "text", text: "EXA_API_KEY environment variable is not set" }], isError: true }; } const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`; logger.log(`Fetching from API: ${url}`); const response = await axios.get(url, { headers: { "x-api-key": apiKey, }, timeout: API_CONFIG.REQUEST_TIMEOUT, }); taskData = response.data; // Cache if completed if (taskData.status === 'completed') { cleanupCache(); taskData._cachedAt = Date.now(); researchResultsCache.set(taskId, taskData); } } if (taskData.status !== 'completed') { return { content: [{ type: "text", text: `Cannot save research that is not completed. Current status: ${taskData.status}` }], isError: true }; } // Build markdown content let markdown = ''; // Title and metadata if (includeMetadata) { markdown += `# Research Report\n\n`; markdown += `**Task ID:** ${taskData.id}\n`; markdown += `**Generated:** ${new Date().toISOString()}\n`; markdown += `**Status:** ${taskData.status}\n\n`; markdown += `---\n\n`; } // Instructions markdown += `## Research Instructions\n\n`; markdown += `${taskData.instructions}\n\n`; markdown += `---\n\n`; // Research results if (taskData.data) { markdown += `## Research Results\n\n`; for (const [key, value] of Object.entries(taskData.data)) { // Convert field name to title case const title = key.split('_').map(word => word.charAt(0).toUpperCase() + word.slice(1)).join(' '); markdown += `### ${title}\n\n`; if (typeof value === 'string') { markdown += `${value}\n\n`; } else { markdown += `\`\`\`json\n${JSON.stringify(value, null, 2)}\n\`\`\`\n\n`; } // Add citations for this field if available if (includeCitations && taskData.citations && taskData.citations[key]) { const citations = taskData.citations[key]; if (citations.length > 0) { markdown += `#### Sources for ${title}\n\n`; citations.forEach((citation, index) => { markdown += `${index + 1}. [${citation.title || 'Source'}](${citation.url})\n`; if (citation.snippet) { markdown += ` > ${citation.snippet}\n\n`; } }); markdown += '\n'; } } } } // All citations section if (includeCitations && taskData.citations) { const totalCitations = Object.values(taskData.citations).reduce((acc, cits) => acc + (Array.isArray(cits) ? cits.length : 0), 0); if (totalCitations > 0) { markdown += `---\n\n`; markdown += `## All Citations (${totalCitations} total)\n\n`; for (const [field, citations] of Object.entries(taskData.citations)) { if (Array.isArray(citations) && citations.length > 0) { const fieldTitle = field.split('_').map(word => word.charAt(0).toUpperCase() + word.slice(1)).join(' '); markdown += `### ${fieldTitle} Sources\n\n`; citations.forEach((citation, index) => { markdown += `- [${citation.title || `Source ${index + 1}`}](${citation.url})\n`; }); markdown += '\n'; } } } } // Ensure directory exists const dir = path.dirname(filePath); await fs.mkdir(dir, { recursive: true }); // Write the file await fs.writeFile(filePath, markdown, 'utf-8'); logger.log(`Saved markdown to: ${filePath}`); logger.complete(); // Get file stats const stats = await fs.stat(filePath); return { content: [{ type: "text", text: JSON.stringify({ success: true, filePath: filePath, fileSize: stats.size, message: `Research results saved to ${filePath}`, linesWritten: markdown.split('\n').length }, null, 2) }] }; } catch (error) { logger.error(error); if (axios.isAxiosError(error)) { return { content: [{ type: "text", text: `Failed to save research to markdown: ${error.response?.data?.message || error.message}` }], isError: true }; } return { content: [{ type: "text", text: `Failed to save research to markdown: ${error instanceof Error ? error.message : String(error)}` }], isError: true }; } }); }