UNPKG

@pinkpixel/prysm-mcp

Version:

MCP server for the Prysm web scraper - enabling AI assistants to scrape web content

43 lines (38 loc) 2.06 kB
/** * Default options for the scraper * All settings should use these defaults instead of hardcoding values */ const os = require('os'); const path = require('path'); // Get output directory from environment variable or use default const PRYSM_OUTPUT_DIR = process.env.PRYSM_OUTPUT_DIR; const PRYSM_IMAGE_OUTPUT_DIR = process.env.PRYSM_IMAGE_OUTPUT_DIR; // Default output paths const HOME_DIR = os.homedir(); const DEFAULT_OUTPUT_DIR = PRYSM_OUTPUT_DIR || path.join(HOME_DIR, 'prysm', 'output'); const DEFAULT_IMAGE_OUTPUT_DIR = PRYSM_IMAGE_OUTPUT_DIR || path.join(DEFAULT_OUTPUT_DIR, 'images'); const DEFAULT_OPTIONS = { // Core user parameters pages: 1, // Default to 1 page (number of links to follow from specified URL) images: false, // Whether to download images output: DEFAULT_OUTPUT_DIR, // Default output directory in user's home imageOutput: DEFAULT_IMAGE_OUTPUT_DIR, // Default image output directory (user changeable) // Hidden parameters (not exposed to users but needed for functionality) maxScrolls: 50, // Maximum scroll attempts scrollDelay: 2000, // Delay between scrolls in milliseconds headless: true, // Run in headless mode bypassCloudflare: true, // Enable Cloudflare bypass handlePagination: true, // Enable auto-pagination paginationStrategy: null, // Auto-detect (null) or force a specific strategy followLinks: false, // By default, don't follow links sameDomainOnly: true, // Only follow links on the same domain maxImages: 100, // Maximum number of images to extract per page minImageSize: 100, // Minimum size for images in pixels imageOutputDir: DEFAULT_IMAGE_OUTPUT_DIR, // For backward compatibility linkSelector: 'a', // Default selector for links to follow when pages > 1 concurrentImageDownloads: 5, // Number of concurrent image downloads waitForContentTimeout: 60000 // Timeout for waiting for content to load }; module.exports = { DEFAULT_OPTIONS };