firecrawl-mcp
Version:
MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.
355 lines (335 loc) • 13.1 kB
JavaScript
/**
* Firecrawl Monitor tools.
*
* Monitors run recurring scrapes/crawls and diff each result against the last
* retained snapshot. The SDK exposes monitor methods, but its HttpClient
* injects a top-level `origin` field into every POST/PATCH body and
* /v2/monitor rejects that with "Unrecognized key in body". Until the SDK
* strips `origin` for monitor requests, we hit /v2/monitor directly via fetch
* — same pattern the CLI uses.
*/
import { z } from 'zod';
const DEFAULT_API_URL = 'https://api.firecrawl.dev';
function resolveAuth(session) {
const apiKey = session?.firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY;
const baseUrl = (process.env.FIRECRAWL_API_URL ?? DEFAULT_API_URL).replace(/\/$/, '');
return { apiKey, baseUrl };
}
async function monitorRequest(session, path, init = {}) {
const { apiKey, baseUrl } = resolveAuth(session);
if (!apiKey && !process.env.FIRECRAWL_API_URL) {
throw new Error('Unauthorized: API key is required for monitor requests');
}
let url = `${baseUrl}/v2${path}`;
if (init.query) {
const qs = new URLSearchParams();
for (const [k, v] of Object.entries(init.query)) {
if (v !== undefined && v !== null && v !== '')
qs.set(k, String(v));
}
const s = qs.toString();
if (s)
url += `?${s}`;
}
const headers = { 'X-Origin': 'mcp' };
if (apiKey)
headers.Authorization = `Bearer ${apiKey}`;
if (init.body !== undefined)
headers['Content-Type'] = 'application/json';
const response = await fetch(url, {
method: init.method ?? 'GET',
headers,
body: init.body !== undefined ? JSON.stringify(init.body) : undefined,
});
const payload = (await response.json().catch(() => ({})));
if (!response.ok || payload?.success === false) {
const message = payload?.error ||
`HTTP ${response.status}: ${response.statusText || 'Request failed'}`;
throw new Error(message);
}
return payload;
}
function asText(data) {
return JSON.stringify(data, null, 2);
}
const pageStatusSchema = z.enum(['same', 'new', 'changed', 'removed', 'error']);
export function registerMonitorTools(server) {
server.addTool({
name: 'firecrawl_monitor_create',
annotations: {
title: 'Create monitor',
readOnlyHint: false,
openWorldHint: true,
},
description: `
Create a Firecrawl monitor — a recurring scrape or crawl that diffs each result against the last retained snapshot.
Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron\` or \`text\`), and \`targets\` (one or more \`{ type: 'scrape', urls: [...] }\` or \`{ type: 'crawl', url: '...' }\`). Optional: \`webhook\`, \`notification\`, \`retentionDays\`.
**Markdown-mode (default):** Each check produces a unified text diff of the page's markdown. No extra configuration needed.
\`\`\`json
{
"name": "firecrawl_monitor_create",
"arguments": {
"body": {
"name": "Blog watch",
"schedule": { "text": "every 30 minutes", "timezone": "UTC" },
"targets": [{ "type": "scrape", "urls": ["https://example.com/blog"] }],
"notification": { "email": { "enabled": true, "recipients": ["a@b.com"] } }
}
}
}
\`\`\`
**JSON-mode change tracking:** To detect changes in **specific structured fields** (price, headline, in-stock flag, list items) instead of the whole page, add a \`changeTracking\` format with \`modes: ["json"]\` and a JSON schema to the target's \`scrapeOptions.formats\`. The check response will then carry a per-field diff (keyed by JSON path, e.g. \`plans[0].price\`) and a \`snapshot.json\` with the full current extraction. See \`firecrawl_monitor_check\` for the response shape.
\`\`\`json
{
"name": "firecrawl_monitor_create",
"arguments": {
"body": {
"name": "Pricing watch",
"schedule": { "text": "hourly", "timezone": "UTC" },
"targets": [{
"type": "scrape",
"urls": ["https://example.com/pricing"],
"scrapeOptions": {
"formats": [{
"type": "changeTracking",
"modes": ["json"],
"prompt": "Extract pricing tiers and headline features for each plan.",
"schema": {
"type": "object",
"properties": {
"plans": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": { "type": "string" },
"price": { "type": "string" },
"features": { "type": "array", "items": { "type": "string" } }
}
}
}
}
}
}]
}
}]
}
}
}
\`\`\`
**Mixed mode (JSON + git-diff):** Use \`modes: ["json", "git-diff"]\` to get both per-field diffs and a markdown sidecar. The page is marked \`changed\` whenever either surface changed.
`,
parameters: z.object({
body: z.record(z.string(), z.any()),
}),
execute: async (args, { session, log }) => {
const { body } = args;
log.info('Creating monitor', { name: body.name });
const res = await monitorRequest(session, '/monitor', {
method: 'POST',
body,
});
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_list',
annotations: {
title: 'List monitors',
readOnlyHint: true,
openWorldHint: false,
},
description: `
List all Firecrawl monitors for the authenticated account.
**Usage Example:**
\`\`\`json
{ "name": "firecrawl_monitor_list", "arguments": { "limit": 20 } }
\`\`\`
`,
parameters: z.object({
limit: z.number().int().positive().optional(),
offset: z.number().int().nonnegative().optional(),
}),
execute: async (args, { session }) => {
const { limit, offset } = args;
const res = await monitorRequest(session, '/monitor', {
query: { limit, offset },
});
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_get',
annotations: {
title: 'Get monitor',
readOnlyHint: true,
openWorldHint: false,
},
description: `
Get a single monitor by ID.
**Usage Example:**
\`\`\`json
{ "name": "firecrawl_monitor_get", "arguments": { "id": "mon_abc123" } }
\`\`\`
`,
parameters: z.object({ id: z.string() }),
execute: async (args, { session }) => {
const { id } = args;
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`);
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_update',
annotations: {
title: 'Update monitor',
readOnlyHint: false,
openWorldHint: true,
},
description: `
Update a monitor. Pass any subset of fields to patch: \`name\`, \`status\` ("active" | "paused"), \`schedule\`, \`targets\`, \`webhook\`, \`notification\`, \`retentionDays\`.
**Usage Example:**
\`\`\`json
{
"name": "firecrawl_monitor_update",
"arguments": {
"id": "mon_abc123",
"body": { "status": "paused" }
}
}
\`\`\`
`,
parameters: z.object({
id: z.string(),
body: z.record(z.string(), z.any()),
}),
execute: async (args, { session }) => {
const { id, body } = args;
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'PATCH', body });
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_delete',
annotations: {
title: 'Delete monitor',
readOnlyHint: false,
destructiveHint: true,
openWorldHint: true,
},
description: `
Permanently delete a monitor and stop its schedule. This cannot be undone.
**Usage Example:**
\`\`\`json
{ "name": "firecrawl_monitor_delete", "arguments": { "id": "mon_abc123" } }
\`\`\`
`,
parameters: z.object({ id: z.string() }),
execute: async (args, { session, log }) => {
const { id } = args;
log.info('Deleting monitor', { id });
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'DELETE' });
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_run',
annotations: {
title: 'Run monitor now',
readOnlyHint: false,
openWorldHint: true,
},
description: `
Trigger a monitor check immediately, outside its normal schedule. Returns the queued check.
**Usage Example:**
\`\`\`json
{ "name": "firecrawl_monitor_run", "arguments": { "id": "mon_abc123" } }
\`\`\`
`,
parameters: z.object({ id: z.string() }),
execute: async (args, { session }) => {
const { id } = args;
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/run`, { method: 'POST' });
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_checks',
annotations: {
title: 'List monitor checks',
readOnlyHint: true,
openWorldHint: false,
},
description: `
List historical checks for a monitor.
**Usage Example:**
\`\`\`json
{ "name": "firecrawl_monitor_checks", "arguments": { "id": "mon_abc123", "limit": 10 } }
\`\`\`
`,
parameters: z.object({
id: z.string(),
limit: z.number().int().positive().optional(),
offset: z.number().int().nonnegative().optional(),
}),
execute: async (args, { session }) => {
const { id, limit, offset } = args;
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks`, { query: { limit, offset } });
return asText(res);
},
});
server.addTool({
name: 'firecrawl_monitor_check',
annotations: {
title: 'Get monitor check',
readOnlyHint: true,
openWorldHint: false,
},
description: `
Get a single check with page-level diff results. Filter \`pageStatus\` to surface only the pages that changed (or were new, removed, etc.).
Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`changed\` | \`removed\` | \`error\`), and — when changed — a \`diff\` and possibly a \`snapshot\`. The shape of \`diff\` depends on the monitor's \`formats\` configuration:
- **Markdown mode (default).** \`diff.text\` is the unified markdown diff; \`diff.json\` is a parse-diff AST (\`{ files: [...] }\`). No \`snapshot\`.
- **JSON mode** (\`changeTracking\` with \`modes: ["json"]\`). \`diff.json\` is a per-field map keyed by JSON path into the extraction, e.g. \`plans[0].price\`, with each value being \`{ previous, current }\`. \`snapshot.json\` is the full current extraction. No \`diff.text\`.
- **Mixed mode** (\`modes: ["json", "git-diff"]\`). Both \`diff.text\` (markdown sidecar) AND \`diff.json\` (per-field map) are present, plus \`snapshot.json\`.
**Example JSON-mode response \`pages[]\` entry:**
\`\`\`json
{
"url": "https://example.com/pricing",
"status": "changed",
"diff": {
"json": {
"plans[0].price": { "previous": "$19/mo", "current": "$24/mo" },
"plans[1].features[2]": { "previous": "10 GB storage", "current": "25 GB storage" }
}
},
"snapshot": { "json": { "plans": [/* current full extraction matching the monitor's schema */] } }
}
\`\`\`
When summarizing a check for the user, prefer \`diff.json\` paths (e.g. "plans[0].price changed from $19/mo to $24/mo") over re-printing the markdown diff — it's more concise and grounded in the schema fields they asked for.
The endpoint paginates via a top-level \`next\` URL; this tool returns one page at a time. Increase \`limit\` (max 100) to fetch fewer pages.
**Usage Example:**
\`\`\`json
{
"name": "firecrawl_monitor_check",
"arguments": {
"id": "mon_abc123",
"checkId": "chk_xyz",
"pageStatus": "changed"
}
}
\`\`\`
`,
parameters: z.object({
id: z.string(),
checkId: z.string(),
limit: z.number().int().positive().optional(),
skip: z.number().int().nonnegative().optional(),
pageStatus: pageStatusSchema.optional(),
}),
execute: async (args, { session }) => {
const { id, checkId, limit, skip, pageStatus } = args;
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks/${encodeURIComponent(checkId)}`, { query: { limit, skip, status: pageStatus } });
return asText(res);
},
});
}