UNPKG

@dendaio/n8n-nodes-collection

Version:

🚀 Comprehensive n8n node collection for financial analysis and automation. Features 55+ technical indicators (RSI, MACD, Bollinger Bands), 32+ candlestick patterns (Doji, Hammer, Engulfing), derivative statistics (Open Interest, Funding Rate, Long/Short

103 lines • 4.56 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ReadabilityContentExtractor = void 0; const n8n_workflow_1 = require("n8n-workflow"); const readability_1 = require("@mozilla/readability"); const jsdom_1 = require("jsdom"); class ReadabilityContentExtractor { constructor() { this.description = { displayName: 'Readability Content Extractor', name: 'readabilityContentExtractor', icon: 'file:ReadabilityContentExtractor.svg', group: ['transform'], version: 1, description: 'Extracts clean, readable content from HTML using Mozilla\'s Readability algorithm. Removes clutter like ads, navigation, and sidebars.', defaults: { name: 'Readability Content Extractor', }, inputs: ['main'], outputs: ['main'], properties: [ { displayName: 'HTML Code', name: 'html', type: 'string', required: true, default: '', placeholder: '', description: 'The HTML content to extract readable text from. Use the "HTTP Request" node to fetch webpage HTML.', }, { displayName: 'Include Full Content', name: 'includeFullContent', type: 'boolean', default: false, description: 'Whether to include the full cleaned HTML content in the output', }, { displayName: 'Include Text Content', name: 'includeTextContent', type: 'boolean', default: true, description: 'Whether to include the plain text content in the output', }, { displayName: 'Include Metadata', name: 'includeMetadata', type: 'boolean', default: true, description: 'Whether to include article metadata like title, excerpt, author, etc', } ], }; } async execute() { const items = this.getInputData(); const output = []; for (let i = 0; i < items.length; i++) { const html = this.getNodeParameter('html', i); const includeFullContent = this.getNodeParameter('includeFullContent', i, false); const includeTextContent = this.getNodeParameter('includeTextContent', i, true); const includeMetadata = this.getNodeParameter('includeMetadata', i, true); if (!html || html.trim() === '') { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'HTML input cannot be empty.'); } const doc = new jsdom_1.JSDOM(html); const article = new readability_1.Readability(doc.window.document).parse(); if (!article) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Could not extract readable content from the provided HTML.'); } const result = {}; if (includeTextContent && article.textContent) { result.textContent = article.textContent.trim(); result.length = article.length || article.textContent.length; } if (includeFullContent && article.content) { result.content = article.content; } if (includeMetadata) { if (article.title) result.title = article.title; if (article.excerpt) result.excerpt = article.excerpt; if (article.siteName) result.siteName = article.siteName; if (article.byline) result.byline = article.byline; if (article.lang) result.language = article.lang; if (article.publishedTime) result.publishedTime = article.publishedTime; if (article.dir) result.textDirection = article.dir; } output.push({ json: result }); } return [this.helpers.returnJsonArray(output)]; } } exports.ReadabilityContentExtractor = ReadabilityContentExtractor; //# sourceMappingURL=ReadabilityContentExtractor.node.js.map