markov-exa-mcp-server
Version:
A Model Context Protocol server with Exa for web search, academic paper search, and Twitter/X.com search. Provides real-time web searches with configurable tool selection, allowing users to enable or disable specific search capabilities. Supports customiz
611 lines (610 loc) • 27.8 kB
JavaScript
import { z } from "zod";
import axios from "axios";
import { API_CONFIG } from "./config.js";
import { createRequestLogger } from "../utils/logger.js";
import * as fs from "fs/promises";
import * as path from "path";
// Store for large research results
const researchResultsCache = new Map();
// Helper function to clear old cache entries (keep for 1 hour)
function cleanupCache() {
const oneHourAgo = Date.now() - 60 * 60 * 1000;
for (const [key, value] of researchResultsCache.entries()) {
// Remove entries older than 1 hour
if (!value._cachedAt || value._cachedAt < oneHourAgo) {
researchResultsCache.delete(key);
}
}
}
export function registerResearchTaskTools(server) {
// Tool to create a research task
server.tool("exa_research_create_task", "Create a research task using Exa AI's research endpoint. This will return a task ID that can be used to check status and retrieve results.", {
instructions: z.string().describe("Research instructions describing what you want to research"),
model: z.string().optional().describe("Research model to use (default: exa-research)"),
outputSchema: z.any().optional().describe("JSON schema for structured output format"),
inferSchema: z.boolean().optional().describe("Allow LLM to generate output schema automatically")
}, async ({ instructions, model, outputSchema, inferSchema }) => {
const requestId = `research_create-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
const logger = createRequestLogger(requestId, 'exa_research_create_task');
try {
const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey;
if (!apiKey) {
logger.error('No API key found');
return {
content: [{
type: "text",
text: "EXA_API_KEY environment variable is not set"
}],
isError: true
};
}
const requestBody = {
instructions,
...(model && { model }),
...(outputSchema || inferSchema !== undefined ? {
output: {
...(outputSchema && { schema: outputSchema }),
...(inferSchema !== undefined && { inferSchema })
}
} : {})
};
logger.log(`Sending POST request to ${API_CONFIG.RESEARCH_ENDPOINT}`);
logger.log(`Request body: ${JSON.stringify(requestBody, null, 2)}`);
const response = await axios.post(API_CONFIG.RESEARCH_ENDPOINT, requestBody, {
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json",
},
timeout: API_CONFIG.REQUEST_TIMEOUT,
});
logger.log(`Response status: ${response.status}`);
logger.log(`Response data: ${JSON.stringify(response.data, null, 2)}`);
logger.complete();
return {
content: [{
type: "text",
text: JSON.stringify({
taskId: response.data.id,
message: `Research task created successfully. Task ID: ${response.data.id}. Use 'exa_research_check_status' to monitor progress.`
}, null, 2)
}]
};
}
catch (error) {
logger.error(error);
if (axios.isAxiosError(error)) {
return {
content: [{
type: "text",
text: `Failed to create research task: ${error.response?.data?.message || error.message}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: `Failed to create research task: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
// Tool to check research task status
server.tool("exa_research_check_status", "Check the status of a research task. If completed and response is large, use exa_research_get_data or exa_research_get_citations to retrieve specific parts.", {
taskId: z.string().describe("The task ID returned from creating a research task")
}, async ({ taskId }) => {
const requestId = `research_status-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
const logger = createRequestLogger(requestId, 'exa_research_check_status');
try {
const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey;
if (!apiKey) {
logger.error('No API key found');
return {
content: [{
type: "text",
text: "EXA_API_KEY environment variable is not set"
}],
isError: true
};
}
const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`;
logger.log(`Sending GET request to ${url}`);
const response = await axios.get(url, {
headers: {
"x-api-key": apiKey,
},
timeout: API_CONFIG.REQUEST_TIMEOUT,
});
logger.log(`Response status: ${response.status}`);
logger.log(`Response data: ${JSON.stringify(response.data, null, 2)}`);
logger.complete();
if (response.data.status === 'failed') {
return {
content: [{
type: "text",
text: JSON.stringify({
taskId: response.data.id,
status: response.data.status,
error: response.data.error || 'Task failed with no error message'
}, null, 2)
}],
isError: true
};
}
// Store the full response in cache for retrieval
if (response.data.status === 'completed' && response.data.data) {
cleanupCache(); // Clean old entries
response.data._cachedAt = Date.now();
researchResultsCache.set(taskId, response.data);
}
// Check response size
const fullResponseText = JSON.stringify(response.data, null, 2);
const responseSize = fullResponseText.length;
// For large responses, return status info and instructions
if (responseSize > 20000) {
const statusInfo = {
taskId: response.data.id,
status: response.data.status,
instructions: response.data.instructions,
responseSize: responseSize,
message: "Response is too large for single retrieval. Use the following tools:",
availableTools: {
"exa_research_get_data": "Retrieve the research data/results",
"exa_research_get_citations": "Retrieve citations for specific fields"
},
dataFields: response.data.data ? Object.keys(response.data.data) : [],
citationFields: response.data.citations ? Object.keys(response.data.citations) : [],
totalCitations: response.data.citations ?
Object.values(response.data.citations).reduce((acc, cits) => acc + (Array.isArray(cits) ? cits.length : 0), 0) : 0
};
return {
content: [{
type: "text",
text: JSON.stringify(statusInfo, null, 2)
}]
};
}
// For smaller responses, return the full data
return {
content: [{
type: "text",
text: fullResponseText
}]
};
}
catch (error) {
logger.error(error);
if (axios.isAxiosError(error)) {
return {
content: [{
type: "text",
text: `Failed to check research task status: ${error.response?.data?.message || error.message}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: `Failed to check research task status: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
// Tool to get research data
server.tool("exa_research_get_data", "Get the data/results from a completed research task. Use this when the full response is too large.", {
taskId: z.string().describe("The task ID returned from creating a research task"),
field: z.string().optional().describe("Specific data field to retrieve (e.g., 'headless_options'). If not specified, returns all data.")
}, async ({ taskId, field }) => {
const requestId = `research_data-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
const logger = createRequestLogger(requestId, 'exa_research_get_data');
try {
// First check cache
let taskData = researchResultsCache.get(taskId);
// If not in cache, fetch from API
if (!taskData) {
const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey;
if (!apiKey) {
logger.error('No API key found');
return {
content: [{
type: "text",
text: "EXA_API_KEY environment variable is not set"
}],
isError: true
};
}
const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`;
logger.log(`Fetching from API: ${url}`);
const response = await axios.get(url, {
headers: {
"x-api-key": apiKey,
},
timeout: API_CONFIG.REQUEST_TIMEOUT,
});
taskData = response.data;
// Cache if completed
if (taskData.status === 'completed') {
cleanupCache();
taskData._cachedAt = Date.now();
researchResultsCache.set(taskId, taskData);
}
}
logger.log(`Task status: ${taskData.status}`);
logger.complete();
if (taskData.status !== 'completed') {
return {
content: [{
type: "text",
text: JSON.stringify({
taskId: taskData.id,
status: taskData.status,
message: taskData.status === 'failed' ?
taskData.error || 'Task failed' :
'Task not yet completed'
}, null, 2)
}]
};
}
// Return specific field or all data
let resultData;
if (field && taskData.data) {
resultData = {
taskId: taskData.id,
field: field,
data: taskData.data[field] || null
};
}
else {
resultData = {
taskId: taskData.id,
data: taskData.data || {}
};
}
return {
content: [{
type: "text",
text: JSON.stringify(resultData, null, 2)
}]
};
}
catch (error) {
logger.error(error);
if (axios.isAxiosError(error)) {
return {
content: [{
type: "text",
text: `Failed to get research data: ${error.response?.data?.message || error.message}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: `Failed to get research data: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
// Tool to get research citations
server.tool("exa_research_get_citations", "Get citations from a completed research task. Use this when you need the sources/references.", {
taskId: z.string().describe("The task ID returned from creating a research task"),
field: z.string().optional().describe("Specific field to get citations for. If not specified, returns all citations.")
}, async ({ taskId, field }) => {
const requestId = `research_citations-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
const logger = createRequestLogger(requestId, 'exa_research_get_citations');
try {
// First check cache
let taskData = researchResultsCache.get(taskId);
// If not in cache, fetch from API
if (!taskData) {
const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey;
if (!apiKey) {
logger.error('No API key found');
return {
content: [{
type: "text",
text: "EXA_API_KEY environment variable is not set"
}],
isError: true
};
}
const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`;
logger.log(`Fetching from API: ${url}`);
const response = await axios.get(url, {
headers: {
"x-api-key": apiKey,
},
timeout: API_CONFIG.REQUEST_TIMEOUT,
});
taskData = response.data;
// Cache if completed
if (taskData.status === 'completed') {
cleanupCache();
taskData._cachedAt = Date.now();
researchResultsCache.set(taskId, taskData);
}
}
logger.log(`Task status: ${taskData.status}`);
logger.complete();
if (taskData.status !== 'completed') {
return {
content: [{
type: "text",
text: JSON.stringify({
taskId: taskData.id,
status: taskData.status,
message: taskData.status === 'failed' ?
taskData.error || 'Task failed' :
'Task not yet completed'
}, null, 2)
}]
};
}
// Return specific field citations or all
let resultCitations;
if (field && taskData.citations) {
resultCitations = {
taskId: taskData.id,
field: field,
citations: taskData.citations[field] || [],
count: taskData.citations[field]?.length || 0
};
}
else {
const totalCount = taskData.citations ?
Object.values(taskData.citations).reduce((acc, cits) => acc + (Array.isArray(cits) ? cits.length : 0), 0) : 0;
resultCitations = {
taskId: taskData.id,
citations: taskData.citations || {},
totalCount: totalCount
};
}
return {
content: [{
type: "text",
text: JSON.stringify(resultCitations, null, 2)
}]
};
}
catch (error) {
logger.error(error);
if (axios.isAxiosError(error)) {
return {
content: [{
type: "text",
text: `Failed to get research citations: ${error.response?.data?.message || error.message}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: `Failed to get research citations: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
// Tool to list all research tasks
server.tool("exa_research_list_tasks", "List all research tasks with their current status.", {
cursor: z.string().optional().describe("Pagination cursor for fetching more results")
}, async ({ cursor }) => {
const requestId = `research_list-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
const logger = createRequestLogger(requestId, 'exa_research_list_tasks');
try {
const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey;
if (!apiKey) {
logger.error('No API key found');
return {
content: [{
type: "text",
text: "EXA_API_KEY environment variable is not set"
}],
isError: true
};
}
const params = {};
if (cursor) {
params.cursor = cursor;
}
logger.log(`Sending GET request to ${API_CONFIG.RESEARCH_ENDPOINT}`);
logger.log(`Request params: ${JSON.stringify(params, null, 2)}`);
const response = await axios.get(API_CONFIG.RESEARCH_ENDPOINT, {
headers: {
"x-api-key": apiKey,
},
params,
timeout: API_CONFIG.REQUEST_TIMEOUT,
});
logger.log(`Response status: ${response.status}`);
logger.log(`Response data: ${JSON.stringify(response.data, null, 2)}`);
logger.complete();
return {
content: [{
type: "text",
text: JSON.stringify({
tasks: response.data.tasks,
...(response.data.cursor && { nextCursor: response.data.cursor })
}, null, 2)
}]
};
}
catch (error) {
logger.error(error);
if (axios.isAxiosError(error)) {
return {
content: [{
type: "text",
text: `Failed to list research tasks: ${error.response?.data?.message || error.message}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: `Failed to list research tasks: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
// Tool to save research results to markdown
server.tool("exa_research_save_to_markdown", "Save research task results to a markdown file with proper formatting", {
taskId: z.string().describe("The task ID of the completed research task"),
filePath: z.string().describe("Full path where to save the markdown file (e.g., /path/to/research.md)"),
includeMetadata: z.boolean().optional().describe("Include metadata like task ID, timestamp, etc. (default: true)"),
includeCitations: z.boolean().optional().describe("Include citations in the markdown (default: true)")
}, async ({ taskId, filePath, includeMetadata = true, includeCitations = true }) => {
const requestId = `research_save_md-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`;
const logger = createRequestLogger(requestId, 'exa_research_save_to_markdown');
try {
// First get the research data (from cache or API)
let taskData = researchResultsCache.get(taskId);
if (!taskData) {
const apiKey = process.env.EXA_API_KEY || process.env.exaApiKey;
if (!apiKey) {
logger.error('No API key found');
return {
content: [{
type: "text",
text: "EXA_API_KEY environment variable is not set"
}],
isError: true
};
}
const url = `${API_CONFIG.RESEARCH_ENDPOINT}/${taskId}`;
logger.log(`Fetching from API: ${url}`);
const response = await axios.get(url, {
headers: {
"x-api-key": apiKey,
},
timeout: API_CONFIG.REQUEST_TIMEOUT,
});
taskData = response.data;
// Cache if completed
if (taskData.status === 'completed') {
cleanupCache();
taskData._cachedAt = Date.now();
researchResultsCache.set(taskId, taskData);
}
}
if (taskData.status !== 'completed') {
return {
content: [{
type: "text",
text: `Cannot save research that is not completed. Current status: ${taskData.status}`
}],
isError: true
};
}
// Build markdown content
let markdown = '';
// Title and metadata
if (includeMetadata) {
markdown += `# Research Report\n\n`;
markdown += `**Task ID:** ${taskData.id}\n`;
markdown += `**Generated:** ${new Date().toISOString()}\n`;
markdown += `**Status:** ${taskData.status}\n\n`;
markdown += `---\n\n`;
}
// Instructions
markdown += `## Research Instructions\n\n`;
markdown += `${taskData.instructions}\n\n`;
markdown += `---\n\n`;
// Research results
if (taskData.data) {
markdown += `## Research Results\n\n`;
for (const [key, value] of Object.entries(taskData.data)) {
// Convert field name to title case
const title = key.split('_').map(word => word.charAt(0).toUpperCase() + word.slice(1)).join(' ');
markdown += `### ${title}\n\n`;
if (typeof value === 'string') {
markdown += `${value}\n\n`;
}
else {
markdown += `\`\`\`json\n${JSON.stringify(value, null, 2)}\n\`\`\`\n\n`;
}
// Add citations for this field if available
if (includeCitations && taskData.citations && taskData.citations[key]) {
const citations = taskData.citations[key];
if (citations.length > 0) {
markdown += `#### Sources for ${title}\n\n`;
citations.forEach((citation, index) => {
markdown += `${index + 1}. [${citation.title || 'Source'}](${citation.url})\n`;
if (citation.snippet) {
markdown += ` > ${citation.snippet}\n\n`;
}
});
markdown += '\n';
}
}
}
}
// All citations section
if (includeCitations && taskData.citations) {
const totalCitations = Object.values(taskData.citations).reduce((acc, cits) => acc + (Array.isArray(cits) ? cits.length : 0), 0);
if (totalCitations > 0) {
markdown += `---\n\n`;
markdown += `## All Citations (${totalCitations} total)\n\n`;
for (const [field, citations] of Object.entries(taskData.citations)) {
if (Array.isArray(citations) && citations.length > 0) {
const fieldTitle = field.split('_').map(word => word.charAt(0).toUpperCase() + word.slice(1)).join(' ');
markdown += `### ${fieldTitle} Sources\n\n`;
citations.forEach((citation, index) => {
markdown += `- [${citation.title || `Source ${index + 1}`}](${citation.url})\n`;
});
markdown += '\n';
}
}
}
}
// Ensure directory exists
const dir = path.dirname(filePath);
await fs.mkdir(dir, { recursive: true });
// Write the file
await fs.writeFile(filePath, markdown, 'utf-8');
logger.log(`Saved markdown to: ${filePath}`);
logger.complete();
// Get file stats
const stats = await fs.stat(filePath);
return {
content: [{
type: "text",
text: JSON.stringify({
success: true,
filePath: filePath,
fileSize: stats.size,
message: `Research results saved to ${filePath}`,
linesWritten: markdown.split('\n').length
}, null, 2)
}]
};
}
catch (error) {
logger.error(error);
if (axios.isAxiosError(error)) {
return {
content: [{
type: "text",
text: `Failed to save research to markdown: ${error.response?.data?.message || error.message}`
}],
isError: true
};
}
return {
content: [{
type: "text",
text: `Failed to save research to markdown: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
});
}