@pinkpixel/prysm-mcp
Version:
MCP server for the Prysm web scraper - enabling AI assistants to scrape web content
75 lines (66 loc) • 2.62 kB
JavaScript
/**
* Prysm Scraper CLI
*
* This script provides a user-friendly CLI for the Prysm scraper.
* It properly handles command line arguments and passes them to the main scraper.
*/
const path = require('path');
const { mainScraper } = require('../main_scraper');
// Get command line arguments (skip 'node' and the script name)
const args = process.argv.slice(2);
// Minimal logging function that uses stderr
function log(message) {
process.stderr.write(`${message}\n`);
}
// Show help and exit
function showHelp() {
log("Prysm - Structure-Aware Web Scraper");
log("\nUsage: npx prysm-scrape [url] [options]");
log(" or: npm run scrape -- [url] [options]");
log("\nOptions:");
log(" --pages <number> Number of links to follow from initial URL (default: 1)");
log(" --images Download images from the page");
log(" --output <path> Custom output path for results (default: ~/prysm/output)");
log(" --image-output <path> Custom output path for images (default: ~/prysm/output/images)");
log(" --help Show this help message");
log("\nExamples:");
log(' npm run scrape -- "https://example.com"');
log(' npm run scrape -- "https://example.com" --pages 5');
log(' npm run scrape -- "https://example.com" --images');
log(' npm run scrape -- "https://example.com" --output "/custom/path"');
log(' npx prysm-scrape "https://example.com" --image-output "/custom/images"');
process.exit(0);
}
// Check if help is needed or no URL is provided
if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
showHelp();
}
// Check if the first arg looks like a URL
const firstArg = args[0];
if (!firstArg.startsWith('http://') && !firstArg.startsWith('https://')) {
log('Error: The first argument must be a valid URL starting with http:// or https://');
showHelp();
}
// Execute the main scraper with all arguments
try {
// Import and run the main scraper module
const mainScraperPath = path.resolve(__dirname, '../main_scraper.js');
const mainScraper = require(mainScraperPath);
// If mainScraper exports a function directly
if (typeof mainScraper === 'function') {
mainScraper(args);
}
// If mainScraper exports an object with a main function
else if (typeof mainScraper.main === 'function') {
mainScraper.main(args);
}
// If mainScraper uses its own argument parsing
else {
// Just requiring the file should be enough as it should
// parse process.argv internally
}
} catch (error) {
log(`Error: ${error.message}`);
process.exit(1);
}