UNPKG

mcp-basic-web-crawler

Version:

A Model Context Protocol (MCP) server providing ethical web crawling and search capabilities

150 lines 4.59 kB
#!/usr/bin/env node /** * MCP Basic Web Crawler Server - Consolidated Single File * * A Model Context Protocol server providing basic web crawling and search capabilities. * This consolidated version contains all functionality in a single file for easy deployment. */ import { Transport } from '@modelcontextprotocol/sdk/shared/transport'; import { z } from 'zod'; export interface Context { error(message: string): Promise<void>; } export interface SearchResult { title: string; link: string; snippet: string; position: number; } export interface MemoryStats { totalMemory: number; freeMemory: number; usedMemory: number; usagePercentage: number; } export interface CrawlerConfig { /** Maximum requests per minute for search operations */ searchRateLimit: number; /** Maximum requests per minute for content fetching */ fetchRateLimit: number; /** Maximum content size to process in memory (bytes) */ maxInMemorySize: number; /** Request timeout in milliseconds */ requestTimeout: number; /** Maximum number of redirects to follow */ maxRedirects: number; /** Maximum content length to return (characters) */ maxContentLength: number; /** User agent string for requests */ userAgent: string; /** Whether to respect robots.txt */ respectRobots: boolean; /** Delay between batch processing (milliseconds) */ batchDelay: number; } export interface RobotsTxtRules { allowed: boolean; crawlDelay?: number; } export declare const DEFAULT_CONFIG: CrawlerConfig; export declare const DuckDuckGoWebSearchArgsSchema: z.ZodObject<{ query: z.ZodString; maxResults: z.ZodDefault<z.ZodOptional<z.ZodNumber>>; }, "strip", z.ZodTypeAny, { query: string; maxResults: number; }, { query: string; maxResults?: number | undefined; }>; export declare const UrlContentExtractorArgsSchema: z.ZodObject<{ url: z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>; }, "strip", z.ZodTypeAny, { url: string | string[]; }, { url: string | string[]; }>; export type DuckDuckGoWebSearchArgs = z.infer<typeof DuckDuckGoWebSearchArgsSchema>; export type UrlContentExtractorArgs = z.infer<typeof UrlContentExtractorArgsSchema>; export declare enum LogLevel { ERROR = 0, WARN = 1, INFO = 2, DEBUG = 3 } export declare class Logger { private level; private prefix; constructor(prefix?: string, level?: LogLevel); private log; error(message: string, ...args: any[]): void; warn(message: string, ...args: any[]): void; info(message: string, ...args: any[]): void; debug(message: string, ...args: any[]): void; setLevel(level: LogLevel): void; child(suffix: string): Logger; } export declare class RateLimiter { private requestsPerMinute; private requests; constructor(requestsPerMinute?: number); /** * Acquire permission to make a request, waiting if necessary */ acquire(): Promise<void>; /** * Get current rate limit status */ getStatus(): { current: number; limit: number; resetTime: Date | null; }; /** * Update the rate limit */ updateLimit(newLimit: number): void; } export declare class DuckDuckGoSearcher { private static readonly BASE_URL; private rateLimiter; private logger; private config; constructor(config: CrawlerConfig, logger: Logger); /** * Format search results for LLM consumption */ formatResultsForLLM(results: SearchResult[]): string; /** * Perform a search query with rate limiting and error handling */ search(query: string, ctx: Context, maxResults?: number): Promise<SearchResult[]>; } export declare class WebContentFetcher { private rateLimiter; private tempFiles; private logger; private config; constructor(config: CrawlerConfig, logger: Logger); private cleanup; private getMemoryStats; private processHtml; fetchAndParse(urlStr: string, ctx: Context): Promise<string>; fetchMultipleUrls(urls: string[], ctx: Context): Promise<Record<string, string>>; } export declare class WebCrawlerServer { private server; private logger; private searcher; private fetcher; constructor(config: CrawlerConfig, logger?: Logger); /** * Start the server with the provided transport */ startServer(transport: Transport): void; /** * Set up request handlers for the MCP server */ private setupRequestHandlers; } //# sourceMappingURL=index.d.ts.map