@aipmanager/search-mcp
Version:
MCP server providing Cursor-like Search tools: read file, list dir, grep, search files, semantic code search, web search
297 lines (296 loc) • 14 kB
JavaScript
#!/usr/bin/env node
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import fs from "fs/promises";
import path from "path";
import fg from "fast-glob";
import OpenAI from "openai";
import { MilvusClient, DataType, MetricType } from "@zilliz/milvus2-sdk-node";
import { Pinecone } from "@pinecone-database/pinecone";
// ---------------- Env & Helpers ----------------
const ENV = {
VECTOR_PROVIDER: process.env.INDEXER_VECTOR_PROVIDER || process.env.SEARCH_VECTOR_PROVIDER || "milvus",
// Milvus
MILVUS_ADDRESS: process.env.MILVUS_ADDRESS || "localhost:19530",
MILVUS_USERNAME: process.env.MILVUS_USERNAME || undefined,
MILVUS_PASSWORD: process.env.MILVUS_PASSWORD || undefined,
MILVUS_COLLECTION: process.env.MILVUS_COLLECTION || "code_chunks",
MILVUS_VECTOR_FIELD: process.env.MILVUS_VECTOR_FIELD || "vector",
MILVUS_METRIC: (process.env.MILVUS_METRIC || "IP"),
// Pinecone
PINECONE_API_KEY: process.env.PINECONE_API_KEY || undefined,
PINECONE_INDEX: process.env.PINECONE_INDEX || undefined,
PINECONE_NAMESPACE: process.env.PINECONE_NAMESPACE || "default",
// Embedding
OPENAI_API_KEY: process.env.OPENAI_API_KEY || undefined,
SILICONFLOW_API_KEY: process.env.SILICONFLOW_API_KEY || undefined,
EMBED_PROVIDER: process.env.SEARCH_EMBED_PROVIDER || process.env.INDEXER_EMBED_PROVIDER || undefined,
EMBED_BASE_URL: process.env.SEARCH_EMBED_BASE_URL || process.env.INDEXER_EMBED_BASE_URL || undefined,
EMBED_MODEL: process.env.SEARCH_EMBED_MODEL || process.env.INDEXER_EMBED_MODEL || "text-embedding-3-small",
// Web Search (Google CSE)
GOOGLE_API_KEY: process.env.GOOGLE_API_KEY || undefined,
GOOGLE_CSE_ID: process.env.GOOGLE_CSE_ID || undefined,
};
function parseBaseDirFromArgv() {
const argv = process.argv.slice(2);
for (let i = 0; i < argv.length; i++) {
const arg = argv[i];
if (arg.startsWith("--baseDir="))
return arg.split("=")[1];
if (arg.startsWith("--base-dir="))
return arg.split("=")[1];
if (arg === "--baseDir" || arg === "--base-dir")
return argv[i + 1];
}
return undefined;
}
const BASE_DIR = (() => {
const candidate = parseBaseDirFromArgv() || process.cwd();
return path.isAbsolute(candidate) ? path.normalize(candidate) : path.normalize(path.join(process.cwd(), candidate));
})();
function assertWithinBaseDir(absPath) {
const rel = path.relative(BASE_DIR, absPath);
if (rel.startsWith("..") || path.isAbsolute(rel)) {
throw new Error(`访问越界:路径不在 baseDir 范围内 (${BASE_DIR})`);
}
}
function toAbsolute(p) {
if (!p)
return p;
const abs = path.isAbsolute(p) ? path.normalize(p) : path.normalize(path.join(BASE_DIR, p));
assertWithinBaseDir(abs);
return abs;
}
async function readTextFileLimited(filePath, offset, limit) {
const abs = toAbsolute(filePath);
const content = await fs.readFile(abs, "utf8");
const lines = content.split(/\r?\n/);
const start = Math.max(0, Math.min(offset ?? 0, lines.length));
const end = Math.max(start, Math.min(start + (limit ?? lines.length), lines.length));
const sliced = lines.slice(start, end).join("\n");
return { text: sliced, totalLines: lines.length };
}
async function grepInFiles(baseDir, pattern, opts) {
const include = opts.include && opts.include.length > 0 ? opts.include : ["**/*"];
const exclude = opts.exclude || ["**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"];
const files = await fg(include, { cwd: baseDir, ignore: exclude, dot: false, onlyFiles: true, unique: true });
const flags = `${opts.caseInsensitive ? "i" : ""}${opts.multiline ? "m" : ""}`;
const regex = new RegExp(pattern, flags);
const results = [];
const headLimit = Math.max(1, Math.min(opts.headLimit ?? 200, 2000));
const beforeN = Math.max(0, Math.min(opts.contextBefore ?? 0, 20));
const afterN = Math.max(0, Math.min(opts.contextAfter ?? 0, 20));
for (const rel of files) {
const abs = path.join(baseDir, rel);
let text;
try {
text = await fs.readFile(abs, "utf8");
}
catch {
continue;
}
const lines = text.split(/\r?\n/);
for (let i = 0; i < lines.length; i++) {
const lineText = lines[i];
if (regex.test(lineText)) {
const before = beforeN > 0 ? lines.slice(Math.max(0, i - beforeN), i) : undefined;
const after = afterN > 0 ? lines.slice(i + 1, Math.min(lines.length, i + 1 + afterN)) : undefined;
const relPath = path.relative(BASE_DIR, abs);
results.push({ file: relPath, line: i + 1, match: lineText, before, after });
if (results.length >= headLimit)
return results;
}
}
}
return results;
}
async function listDirSafe(dir) {
const abs = toAbsolute(dir);
const entries = await fs.readdir(abs, { withFileTypes: true });
return entries.map((e) => ({ name: e.name, type: e.isDirectory() ? "dir" : e.isFile() ? "file" : "other" }));
}
async function fuzzySearchFiles(baseDir, query, limit = 200) {
const files = await fg(["**/*"], { cwd: baseDir, ignore: ["**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"], onlyFiles: true, dot: false });
const q = query.toLowerCase();
const matched = files.filter((p) => p.toLowerCase().includes(q)).slice(0, limit);
// 转为全局 baseDir 相对路径
return matched.map((p) => path.relative(BASE_DIR, path.join(baseDir, p)));
}
// Embedding utility
async function embedTexts(texts) {
// provider: openai | siliconflow | custom
const provider = (ENV.EMBED_PROVIDER || (ENV.SILICONFLOW_API_KEY ? 'siliconflow' : 'openai')).toLowerCase();
let apiKey;
let baseURL = ENV.EMBED_BASE_URL;
if (provider === 'siliconflow') {
apiKey = ENV.SILICONFLOW_API_KEY;
baseURL = baseURL || 'https://api.siliconflow.cn/v1';
}
else if (provider === 'custom') {
apiKey = ENV.OPENAI_API_KEY || ENV.SILICONFLOW_API_KEY; // 允许自定义时任选其一
if (!baseURL)
throw new Error('SEARCH_EMBED_BASE_URL is required for custom provider');
}
else {
// default openai
apiKey = ENV.OPENAI_API_KEY;
}
if (!apiKey)
throw new Error('Embedding provider API key is missing');
const client = new OpenAI({ apiKey, ...(baseURL ? { baseURL } : {}) });
const res = await client.embeddings.create({ model: ENV.EMBED_MODEL, input: texts });
return res.data.map((d) => d.embedding);
}
async function semanticSearch(query, topK = 8) {
const [vec] = await embedTexts([query]);
if ((ENV.VECTOR_PROVIDER || "milvus").toLowerCase() === "pinecone") {
if (!ENV.PINECONE_API_KEY || !ENV.PINECONE_INDEX)
throw new Error("Pinecone env PINECONE_API_KEY/PINECONE_INDEX required");
const pc = new Pinecone({ apiKey: ENV.PINECONE_API_KEY });
const index = pc.index(ENV.PINECONE_INDEX).namespace(ENV.PINECONE_NAMESPACE || "default");
const q = await index.query({ topK, vector: vec, includeMetadata: true });
return (q.matches || []).map((m) => ({ id: m.id, score: m.score, metadata: m.metadata }));
}
// default milvus
const client = new MilvusClient({ address: ENV.MILVUS_ADDRESS, username: ENV.MILVUS_USERNAME, password: ENV.MILVUS_PASSWORD, ssl: false });
try {
await client.loadCollectionSync({ collection_name: ENV.MILVUS_COLLECTION });
}
catch { }
const annsField = ENV.MILVUS_VECTOR_FIELD || 'vector';
const res = await client.search({
collection_name: ENV.MILVUS_COLLECTION,
vectors: [vec],
vector_type: DataType.FloatVector,
anns_field: annsField,
topk: topK,
metric_type: MetricType[ENV.MILVUS_METRIC] || MetricType.IP,
params: { nprobe: 10 },
output_fields: ["id", "repo", "path", "url", "commit", "meta_json"],
});
const out = [];
for (const hit of res.results || res.results_fields || []) {
const fields = hit.fields || hit;
out.push({
id: fields.id,
score: hit.score || fields.distance || 0,
repo: fields.repo,
path: fields.path,
url: fields.url,
commit: fields.commit,
meta: fields.meta_json ? JSON.parse(fields.meta_json) : undefined,
});
}
return out;
}
async function googleCseSearch(query, num = 5) {
if (!ENV.GOOGLE_API_KEY || !ENV.GOOGLE_CSE_ID) {
return { warning: "未配置 GOOGLE_API_KEY/GOOGLE_CSE_ID,返回占位结果。", results: [] };
}
const url = new URL("https://www.googleapis.com/customsearch/v1");
url.searchParams.set("key", ENV.GOOGLE_API_KEY);
url.searchParams.set("cx", ENV.GOOGLE_CSE_ID);
url.searchParams.set("q", query);
url.searchParams.set("num", String(Math.max(1, Math.min(num, 10))));
const res = await fetch(url.toString());
const data = await res.json();
const items = (data.items || []).map((it) => ({ title: it.title, link: it.link, snippet: it.snippet }));
return { results: items };
}
// ---------------- MCP Server ----------------
const server = new McpServer({ name: "aipm-search", version: "1.0.0", description: "Search MCP: read/list, grep, search files, semantic code search, web search" });
// Read File
server.registerTool("readFile", {
title: "读取文件(可限制行数)",
description: "读取指定文件内容,可通过 offset 与 limit 控制返回行范围。",
inputSchema: {
path: z.string().describe("文件绝对或相对路径"),
offset: z.number().int().optional().describe("起始行,从0开始,可选"),
limit: z.number().int().optional().describe("返回的最大行数,可选")
},
}, async ({ path: filePath, offset, limit }) => {
const { text, totalLines } = await readTextFileLimited(filePath, offset, limit);
return { content: [{ type: "text", text }], meta: { totalLines } };
});
// List Directory
server.registerTool("listDirectory", {
title: "列出目录",
description: "列出目录内容(文件/文件夹)",
inputSchema: { path: z.string().describe("目录路径") },
}, async ({ path: dir }) => {
const entries = await listDirSafe(dir);
return { content: [{ type: "text", text: JSON.stringify(entries, null, 2) }] };
});
// Grep
server.registerTool("grepSearch", {
title: "Grep 搜索",
description: "在代码中使用正则查找匹配的行,支持大小写与多行模式、上下文和数量限制。",
inputSchema: {
baseDir: z.string().optional().describe("搜索起始目录,默认当前工作目录"),
pattern: z.string().describe("正则表达式"),
include: z.array(z.string()).optional().describe("包含的glob列表,默认 **/*"),
exclude: z.array(z.string()).optional().describe("排除的glob列表"),
caseInsensitive: z.boolean().optional(),
multiline: z.boolean().optional(),
contextBefore: z.number().int().optional(),
contextAfter: z.number().int().optional(),
headLimit: z.number().int().optional(),
},
}, async ({ baseDir, pattern, include, exclude, caseInsensitive, multiline, contextBefore, contextAfter, headLimit }) => {
const dir = toAbsolute(baseDir || ".");
const hits = await grepInFiles(dir, pattern, { include, exclude, caseInsensitive, multiline, contextBefore, contextAfter, headLimit });
return { content: [{ type: "text", text: JSON.stringify(hits, null, 2) }] };
});
// Search Files (fuzzy by name)
server.registerTool("searchFiles", {
title: "按文件名模糊搜索",
description: "基于文件名的模糊匹配,返回相对权重的前若干个文件路径。",
inputSchema: {
baseDir: z.string().optional().describe("起始目录,默认当前工作目录"),
query: z.string().describe("查询词,将执行文件名包含匹配"),
limit: z.number().int().optional().describe("返回数量上限,默认200")
},
}, async ({ baseDir, query, limit }) => {
const dir = toAbsolute(baseDir || ".");
const files = await fuzzySearchFiles(dir, query, limit);
return { content: [{ type: "text", text: JSON.stringify(files, null, 2) }] };
});
// Codebase semantic search
server.registerTool("codebaseSearch", {
title: "语义搜索代码库",
description: "基于向量数据库(Milvus/Pinecone)的代码片段语义检索",
inputSchema: {
query: z.string().describe("检索语句"),
topK: z.number().int().optional().describe("返回条数,默认为8")
},
}, async ({ query, topK }) => {
const results = await semanticSearch(query, topK);
return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
});
// Web search via Google CSE if configured
server.registerTool("webSearch", {
title: "Web 搜索",
description: "调用 Google CSE(需配置 GOOGLE_API_KEY/GOOGLE_CSE_ID),否则返回占位结果。",
inputSchema: {
query: z.string().describe("检索语句"),
num: z.number().int().optional().describe("返回条数,1-10,默认5")
},
}, async ({ query, num }) => {
const data = await googleCseSearch(query, num);
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
});
async function main() {
try {
const transport = new StdioServerTransport();
await server.connect(transport);
// eslint-disable-next-line no-console
console.log("[SearchMCP] ready on stdio");
}
catch (err) {
// eslint-disable-next-line no-console
console.error("[SearchMCP] failed to start:", err);
process.exit(1);
}
}
main();