UNPKG

@pinkpixel/prysm-mcp

Version:

MCP server for the Prysm web scraper - enabling AI assistants to scrape web content

75 lines (66 loc) 2.62 kB
#!/usr/bin/env node /** * Prysm Scraper CLI * * This script provides a user-friendly CLI for the Prysm scraper. * It properly handles command line arguments and passes them to the main scraper. */ const path = require('path'); const { mainScraper } = require('../main_scraper'); // Get command line arguments (skip 'node' and the script name) const args = process.argv.slice(2); // Minimal logging function that uses stderr function log(message) { process.stderr.write(`${message}\n`); } // Show help and exit function showHelp() { log("Prysm - Structure-Aware Web Scraper"); log("\nUsage: npx prysm-scrape [url] [options]"); log(" or: npm run scrape -- [url] [options]"); log("\nOptions:"); log(" --pages <number> Number of links to follow from initial URL (default: 1)"); log(" --images Download images from the page"); log(" --output <path> Custom output path for results (default: ~/prysm/output)"); log(" --image-output <path> Custom output path for images (default: ~/prysm/output/images)"); log(" --help Show this help message"); log("\nExamples:"); log(' npm run scrape -- "https://example.com"'); log(' npm run scrape -- "https://example.com" --pages 5'); log(' npm run scrape -- "https://example.com" --images'); log(' npm run scrape -- "https://example.com" --output "/custom/path"'); log(' npx prysm-scrape "https://example.com" --image-output "/custom/images"'); process.exit(0); } // Check if help is needed or no URL is provided if (args.length === 0 || args.includes('--help') || args.includes('-h')) { showHelp(); } // Check if the first arg looks like a URL const firstArg = args[0]; if (!firstArg.startsWith('http://') && !firstArg.startsWith('https://')) { log('Error: The first argument must be a valid URL starting with http:// or https://'); showHelp(); } // Execute the main scraper with all arguments try { // Import and run the main scraper module const mainScraperPath = path.resolve(__dirname, '../main_scraper.js'); const mainScraper = require(mainScraperPath); // If mainScraper exports a function directly if (typeof mainScraper === 'function') { mainScraper(args); } // If mainScraper exports an object with a main function else if (typeof mainScraper.main === 'function') { mainScraper.main(args); } // If mainScraper uses its own argument parsing else { // Just requiring the file should be enough as it should // parse process.argv internally } } catch (error) { log(`Error: ${error.message}`); process.exit(1); }