UNPKG

obsidian-mcp-server

Version:

MCP server for Obsidian vaults — read, write, search, and surgically edit notes, tags, and frontmatter via the Local REST API plugin. STDIO or Streamable HTTP.

389 lines 20 kB
/** * @fileoverview obsidian_search_notes — text/jsonlogic/omnisearch search with * MCP-spec cursor pagination. The `omnisearch` mode is added conditionally by * the entry point only when the Omnisearch plugin's HTTP server is reachable * at startup. Text-mode hits additionally clip per file via `maxMatchesPerHit` * so a single match-heavy note can't blow the response budget — clipped hits * carry `truncated: true` and `totalMatches`. * @module mcp-server/tools/definitions/obsidian-search-notes.tool */ import { tool, z } from '@cyanheads/mcp-ts-core'; import { JsonRpcErrorCode } from '@cyanheads/mcp-ts-core/errors'; import { paginateArray } from '@cyanheads/mcp-ts-core/utils'; import { getObsidianService } from '../../../services/obsidian/obsidian-service.js'; const DEFAULT_PAGE_SIZE = 50; const MAX_PAGE_SIZE = 200; const DEFAULT_MATCHES_PER_HIT = 10; /** Omnisearch's hardwired upstream cap — pagination/limit params are ignored. */ const OMNISEARCH_UPSTREAM_CAP = 50; const CursorSchema = z .string() .optional() .describe('Opaque cursor from a prior response. Omit for the first page. Page size is server-determined; do not assume a fixed value.'); const TextHitSchema = z .object({ filename: z.string().describe('Vault-relative path of the matching note.'), matches: z .array(z .object({ context: z.string().describe('Surrounding text around the match.'), match: z .object({ start: z.number().describe('Match start offset in the surrounding context.'), end: z.number().describe('Match end offset in the surrounding context.'), }) .describe('Match offsets within the context window.'), }) .describe('A single match within a file.')) .describe('Per-match context windows. Capped per file by `maxMatchesPerHit`.'), totalMatches: z .number() .optional() .describe('Total matches in this file. Present only when `matches` was clipped to `maxMatchesPerHit`.'), truncated: z .boolean() .optional() .describe('True when `matches` was clipped to `maxMatchesPerHit`. Use `obsidian_get_note` to read the full file when more context is needed.'), }) .describe('A file with one or more text-search matches.'); const StructuredHitSchema = z .object({ filename: z.string().describe('Vault-relative path of the matching note.'), result: z.unknown().describe('The query result for this file — shape determined by the query.'), }) .describe('A file with a structured (Dataview/JSONLogic) result value.'); const OmnisearchHitSchema = z .object({ filename: z.string().describe('Vault-relative path of the matching note.'), basename: z.string().describe('Note basename without extension.'), score: z.number().describe('BM25 relevance score. Higher is more relevant.'), foundWords: z .array(z.string()) .describe('Query words found in the note. Populated even when no body match exists (e.g. basename-only match), so empty `matches` paired with non-empty `foundWords` is valid.'), matches: z .array(z .object({ match: z.string().describe('The matched substring.'), offset: z.number().describe('Offset of the match within the note body.'), }) .describe('A single match span in the note body.')) .describe('Match positions within the note body. May be empty for basename-only matches.'), excerpt: z .string() .describe('Surrounding-context excerpt with `<mark>` around matches; HTML entities are decoded and `<br>` becomes `\\n`.'), }) .describe('An Omnisearch BM25-ranked hit.'); /** * Build the `obsidian_search_notes` tool. The `omnisearch` mode is included * in the input/output schemas only when `omnisearchReachable` is true so the * LLM never sees it as an option on a deployment where it can't run. Re-probe * requires a server restart. */ export function buildSearchNotesTool({ omnisearchReachable }) { const modeEnum = omnisearchReachable ? ['text', 'jsonlogic', 'omnisearch'] : ['text', 'jsonlogic']; const description = omnisearchReachable ? 'Search the vault by text substring, JSONLogic predicate, or BM25-ranked Omnisearch query. Pick the mode that matches the query shape — `omnisearch` is best for ranked relevance, typo tolerance, and PDF/OCR coverage (via the Text Extractor plugin). Results paginate via opaque cursors: omit `cursor` for the first page, then pass `nextCursor` from the prior response. Text-mode hits additionally clip per file at `maxMatchesPerHit`.' : 'Search the vault by text substring or JSONLogic predicate. Pick the mode that matches the query shape. Results paginate via opaque cursors: omit `cursor` for the first page, then pass `nextCursor` from the prior response. Text-mode hits additionally clip per file at `maxMatchesPerHit`.'; const inputSchema = z.object({ mode: z .enum(modeEnum) .describe(omnisearchReachable ? 'Which search algorithm to run. `text` matches a substring case-insensitively across filenames and note bodies, returning surrounding context windows. `jsonlogic` evaluates a JSONLogic tree against each note, with `var` paths into `path`, `content`, `frontmatter.<key>`, `tags`, and `stat.{ctime,mtime,size}`, plus `glob` and `regexp` operators. `omnisearch` runs a BM25-ranked query via the Omnisearch plugin — supports quoted phrases, `-exclusion`, `path:` / `ext:` filters, typo tolerance, and PDF/OCR (with Text Extractor); upstream caps results at 50.' : 'Which search algorithm to run. `text` matches a substring case-insensitively across filenames and note bodies, returning surrounding context windows. `jsonlogic` evaluates a JSONLogic tree against each note, with `var` paths into `path`, `content`, `frontmatter.<key>`, `tags`, and `stat.{ctime,mtime,size}`, plus `glob` and `regexp` operators.'), query: z .string() .optional() .describe('The query string. Required for `text` and `omnisearch` modes; ignored in `jsonlogic` mode (use `logic` instead — passing a JSONLogic tree here will fail Zod validation since this field must be a string).'), logic: z .record(z.string(), z.unknown()) .optional() .describe('JSONLogic tree. Required for `jsonlogic` mode; ignored in `text` and `omnisearch` modes (use `query` instead — passing a string here will fail Zod validation since this field must be an object).'), contextLength: z .number() .int() .positive() .default(100) .describe('Characters of context on each side of the match (text mode only).'), pathPrefix: z .string() .optional() .describe('Filter returned filenames by prefix (text mode only, applied client-side).'), maxMatchesPerHit: z .number() .int() .positive() .default(DEFAULT_MATCHES_PER_HIT) .describe('Cap on match contexts returned per file in text mode. When clipped, the hit carries `truncated: true` and `totalMatches`.'), cursor: CursorSchema, }); const textBranch = z .object({ mode: z.literal('text').describe('Echoed mode.'), hits: z.array(TextHitSchema).describe('Matching files with per-match context.'), totalCount: z .number() .describe('Total post-path-policy hit count across all pages, before pagination.'), nextCursor: z .string() .optional() .describe('Opaque cursor for the next page. Omitted on the last page (do not treat absent as null).'), }) .describe('Text-search results.'); const jsonlogicBranch = z .object({ mode: z.literal('jsonlogic').describe('Echoed mode.'), hits: z .array(StructuredHitSchema) .describe('Matching files with the JSONLogic result per file.'), totalCount: z .number() .describe('Total post-path-policy hit count across all pages, before pagination.'), nextCursor: z .string() .optional() .describe('Opaque cursor for the next page. Omitted on the last page (do not treat absent as null).'), }) .describe('JSONLogic results.'); const omnisearchBranch = z .object({ mode: z.literal('omnisearch').describe('Echoed mode.'), hits: z.array(OmnisearchHitSchema).describe('BM25-ranked matching files.'), totalCount: z .number() .describe('Total post-path-policy hit count across all pages, before pagination.'), nextCursor: z .string() .optional() .describe('Opaque cursor for the next page. Omitted on the last page (do not treat absent as null).'), truncated: z .boolean() .describe("True when the upstream returned exactly 50 raw hits (Omnisearch's hardwired cap); more matches may exist that are not retrievable. Narrow the query to surface additional results."), }) .describe('Omnisearch BM25 results.'); const branches = omnisearchReachable ? [textBranch, jsonlogicBranch, omnisearchBranch] : [textBranch, jsonlogicBranch]; const outputSchema = z.object({ result: z .discriminatedUnion('mode', [...branches]) .describe('Mode-discriminated search payload.'), }); /** * Declared inline as a `const` tuple so `tool()`'s `const TErrors` generic * captures the literal reason strings — that's what gives the handler its * typed `ctx.fail<'reason'>(...)`. The `omnisearch_unreachable` entry is * declared unconditionally even when `omnisearchReachable` is false; the * branch that throws it only runs in the omnisearch handler path (which * only runs when reachable), so the entry is harmless when unused and * keeps the contract shape stable across deployments. */ const errors = [ { reason: 'path_prefix_invalid_mode', code: JsonRpcErrorCode.ValidationError, when: '`pathPrefix` was provided in a non-text mode (only `text` supports prefix filtering).', recovery: 'Drop pathPrefix or switch mode to text for prefix filtering.', }, { reason: 'query_required', code: JsonRpcErrorCode.ValidationError, when: '`query` is missing for `text` or `omnisearch` mode (required for both).', recovery: 'Pass `query` — substring for text mode, or BM25 query syntax (quoted phrases, `-exclusion`, `path:` / `ext:` filters) for omnisearch.', }, { reason: 'logic_required', code: JsonRpcErrorCode.ValidationError, when: '`logic` is missing for `jsonlogic` mode.', recovery: 'Pass a JSONLogic tree as `logic`, e.g. `{"glob": [{"var": "path"}, "Projects/*.md"]}`.', }, { reason: 'omnisearch_unreachable', code: JsonRpcErrorCode.ServiceUnavailable, when: 'Omnisearch was reachable at startup but is now unreachable (Obsidian quit, plugin disabled, or mobile session).', retryable: true, recovery: 'Restart Obsidian with the Omnisearch plugin enabled, then restart this MCP server so it re-probes the plugin URL.', }, ]; return tool('obsidian_search_notes', { description, annotations: { readOnlyHint: true, idempotentHint: true }, input: inputSchema, output: outputSchema, // Agent-facing context on the success path — reaches structuredContent AND // content[] automatically; no format() entry needed. enrichment: { effectiveQuery: z .string() .optional() .describe('The query string as submitted (text and omnisearch modes only).'), notice: z.string().optional().describe('Recovery guidance when the search returned no hits.'), }, auth: ['tool:obsidian_search_notes:read'], errors, async handler(input, ctx) { const svc = getObsidianService(); if (input.pathPrefix && input.mode !== 'text') { throw ctx.fail('path_prefix_invalid_mode', '`pathPrefix` is only valid in text mode.', { mode: input.mode, ...ctx.recoveryFor('path_prefix_invalid_mode'), }); } const policy = svc.policy; if (input.mode === 'text') { if (!input.query) { throw ctx.fail('query_required', '`query` is required for text mode.', { mode: input.mode, ...ctx.recoveryFor('query_required'), }); } ctx.enrich.echo(input.query); const raw = await svc.searchText(ctx, input.query, input.contextLength); const prefix = input.pathPrefix; const prefixed = prefix ? raw.filter((h) => h.filename.startsWith(prefix)) : raw; const allowed = policy.filterReadable(prefixed); const clipped = allowed.map((h) => clipMatches(h, input.maxMatchesPerHit)); const page = paginate(clipped, input.cursor, ctx); if (page.hits.length === 0) { ctx.enrich.notice(`No matches for "${input.query}"${prefix ? ` under prefix "${prefix}"` : ''}. Try broader terms, a different mode, or check that the path/filter is correct.`); } return { result: { mode: 'text', ...page } }; } if (input.mode === 'jsonlogic') { if (!input.logic) { throw ctx.fail('logic_required', '`logic` (JSONLogic tree) is required for jsonlogic mode.', { mode: input.mode, ...ctx.recoveryFor('logic_required') }); } const raw = await svc.searchJsonLogic(ctx, input.logic); const allowed = policy.filterReadable(raw); const page = paginate(allowed, input.cursor, ctx); if (page.hits.length === 0) { ctx.enrich.notice('No matches for the JSONLogic predicate. Verify the logic tree and field references.'); } return { result: { mode: 'jsonlogic', ...page } }; } // omnisearch — only reachable when omnisearchReachable is true at build time. if (!input.query) { throw ctx.fail('query_required', '`query` is required for omnisearch mode.', { mode: input.mode, ...ctx.recoveryFor('query_required'), }); } ctx.enrich.echo(input.query); const raw = await svc.searchOmnisearch(ctx, input.query); /** * Compute `truncated` against the raw upstream array, before path-policy * filtering — a filtered-down set legitimately under 50 should not be * reported as truncated. */ const truncated = raw.length >= OMNISEARCH_UPSTREAM_CAP; const allowed = policy.filterReadable(raw); const page = paginate(allowed, input.cursor, ctx); if (page.hits.length === 0) { ctx.enrich.notice(`No Omnisearch matches for "${input.query}". Try broader terms, fewer exclusions, or switch to text mode.`); } return { result: { mode: 'omnisearch', ...page, truncated, }, }; }, format: ({ result }) => { const lines = []; const pageInfo = `${result.hits.length} on this page · ${result.totalCount} total`; const cursorInfo = result.nextCursor ? ' · more available' : ''; lines.push(`**Search (${result.mode}) — ${pageInfo}${cursorInfo}**`); if (result.mode === 'omnisearch' && result.truncated) { lines.push(`_Upstream returned the full ${OMNISEARCH_UPSTREAM_CAP}-hit cap; more matches may exist. Narrow the query to surface them._`); } if (result.nextCursor) { lines.push(`_Next page cursor: \`${result.nextCursor}\`_`); } lines.push(''); if (result.mode === 'text') { for (const h of result.hits) { const trunc = h.truncated ? ` — truncated, showing first ${h.matches.length} of ${h.totalMatches} matches` : ''; lines.push(`### ${h.filename}${trunc}`); for (const m of h.matches) { lines.push(`- match[${m.match.start}–${m.match.end}]: ${truncate(m.context, 240)}`); } } } else if (result.mode === 'omnisearch') { for (const h of result.hits) { lines.push(`### ${h.filename} (score: ${h.score.toFixed(2)})`); if (h.foundWords.length > 0) { lines.push(`**Matched:** ${h.foundWords.map((w) => `\`${w}\``).join(', ')}`); } if (h.excerpt) lines.push(`> ${h.excerpt.replace(/\n/g, '\n> ')}`); } } else { for (const h of result.hits) { lines.push(`### ${h.filename}`); lines.push(`result:`); lines.push('```json'); lines.push(safeJsonStringify(h.result)); lines.push('```'); } } return [{ type: 'text', text: lines.join('\n') }]; }, }); } /** * Static specimen for the MCP definition linter (which duck-types tool * exports out of each `.tool.ts` file) and for existing tests that import * the tool directly. Defaults to `omnisearchReachable: false` — the safe * baseline that doesn't assume the optional plugin is installed. The entry * point (`src/index.ts`) builds the live tool via `buildSearchNotesTool` * with the actual probe result; this export is not the registered tool. * The omnisearch-enabled variant is exercised by tests rather than the * linter (two exports under the same tool name would collide on * `name-unique`). */ export const obsidianSearchNotes = buildSearchNotesTool({ omnisearchReachable: false }); /** * Apply MCP-spec cursor pagination to a fully assembled, post-filter result * array. Returns the page's hits, the total pre-pagination count, and * `nextCursor` (omitted on the last page). Localizes the `Context` → * `RequestContext` cast — `paginateArray`'s signature requires the index- * signature shape that handler-facing `Context` doesn't carry. */ function paginate(items, cursor, ctx) { const page = paginateArray(items, cursor, DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE, ctx); return { hits: page.items, totalCount: page.totalCount ?? items.length, ...(page.nextCursor !== undefined ? { nextCursor: page.nextCursor } : {}), }; } function clipMatches(hit, cap) { if (hit.matches.length <= cap) return hit; return { ...hit, matches: hit.matches.slice(0, cap), truncated: true, totalMatches: hit.matches.length, }; } function truncate(s, n) { if (s.length <= n) return s; return `${s.slice(0, n)}…`; } function safeJsonStringify(v) { try { return JSON.stringify(v, null, 2); } catch { return String(v); } } //# sourceMappingURL=obsidian-search-notes.tool.js.map