UNPKG

defuddle

Version:

Extract article content and metadata from web pages.

140 lines 5.42 kB
#!/usr/bin/env node "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); const commander_1 = require("commander"); const node_1 = require("./node"); const promises_1 = require("fs/promises"); const path_1 = require("path"); // ANSI color helpers (avoids chalk dependency which is ESM-only) const useColor = process.stdout.isTTY ?? false; const ansi = { red: (s) => useColor ? `\x1b[31m${s}\x1b[39m` : s, green: (s) => useColor ? `\x1b[32m${s}\x1b[39m` : s, }; // Read version from package.json const version = require('../package.json').version; const program = new commander_1.Command(); program .name('defuddle') .description('Extract article content from web pages') .version(version); program .command('parse') .description('Parse HTML content from a file or URL') .argument('<source>', 'HTML file path or URL to parse') .option('-o, --output <file>', 'Output file path (default: stdout)') .option('-m, --markdown', 'Convert content to markdown format') .option('--md', 'Alias for --markdown') .option('-j, --json', 'Output as JSON with metadata and content') .option('-p, --property <name>', 'Extract a specific property (e.g., title, description, domain)') .option('--debug', 'Enable debug mode') .action(async (source, options) => { try { // Handle --md alias if (options.md) { options.markdown = true; } let JSDOM; try { JSDOM = (await Promise.resolve().then(() => __importStar(require('jsdom')))).JSDOM; } catch { console.error(ansi.red('Error: jsdom is required for the CLI. Install it with: npm install jsdom')); process.exit(1); } let dom; // Determine if source is a URL or file path if (source.startsWith('http://') || source.startsWith('https://')) { dom = await JSDOM.fromURL(source); } else { const filePath = (0, path_1.resolve)(process.cwd(), source); dom = await JSDOM.fromFile(filePath); } const result = await (0, node_1.Defuddle)(dom, source.startsWith('http') ? source : undefined, { debug: options.debug, markdown: options.markdown }); // Format output let output; if (options.property) { const property = options.property; if (property in result) { output = result[property]?.toString() || ''; } else { console.error(ansi.red(`Error: Property "${property}" not found in response`)); process.exit(1); } } else if (options.json) { output = JSON.stringify({ content: result.content, title: result.title, description: result.description, domain: result.domain, favicon: result.favicon, image: result.image, metaTags: result.metaTags, parseTime: result.parseTime, published: result.published, author: result.author, site: result.site, schemaOrgData: result.schemaOrgData, wordCount: result.wordCount, ...(result.variables ? { variables: result.variables } : {}), }, null, 2); } else { output = result.content; } // Handle output if (options.output) { const outputPath = (0, path_1.resolve)(process.cwd(), options.output); await (0, promises_1.writeFile)(outputPath, output, 'utf-8'); console.log(ansi.green(`Output written to ${options.output}`)); } else { console.log(output); } } catch (error) { console.error(ansi.red('Error:'), error instanceof Error ? error.message : 'Unknown error occurred'); process.exit(1); } }); program.parse(); //# sourceMappingURL=cli.js.map