@dendaio/n8n-nodes-collection
Version:
🚀 Comprehensive n8n node collection for financial analysis and automation. Features 55+ technical indicators (RSI, MACD, Bollinger Bands), 32+ candlestick patterns (Doji, Hammer, Engulfing), derivative statistics (Open Interest, Funding Rate, Long/Short
103 lines • 4.56 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ReadabilityContentExtractor = void 0;
const n8n_workflow_1 = require("n8n-workflow");
const readability_1 = require("@mozilla/readability");
const jsdom_1 = require("jsdom");
class ReadabilityContentExtractor {
constructor() {
this.description = {
displayName: 'Readability Content Extractor',
name: 'readabilityContentExtractor',
icon: 'file:ReadabilityContentExtractor.svg',
group: ['transform'],
version: 1,
description: 'Extracts clean, readable content from HTML using Mozilla\'s Readability algorithm. Removes clutter like ads, navigation, and sidebars.',
defaults: {
name: 'Readability Content Extractor',
},
inputs: ['main'],
outputs: ['main'],
properties: [
{
displayName: 'HTML Code',
name: 'html',
type: 'string',
required: true,
default: '',
placeholder: '',
description: 'The HTML content to extract readable text from. Use the "HTTP Request" node to fetch webpage HTML.',
},
{
displayName: 'Include Full Content',
name: 'includeFullContent',
type: 'boolean',
default: false,
description: 'Whether to include the full cleaned HTML content in the output',
},
{
displayName: 'Include Text Content',
name: 'includeTextContent',
type: 'boolean',
default: true,
description: 'Whether to include the plain text content in the output',
},
{
displayName: 'Include Metadata',
name: 'includeMetadata',
type: 'boolean',
default: true,
description: 'Whether to include article metadata like title, excerpt, author, etc',
}
],
};
}
async execute() {
const items = this.getInputData();
const output = [];
for (let i = 0; i < items.length; i++) {
const html = this.getNodeParameter('html', i);
const includeFullContent = this.getNodeParameter('includeFullContent', i, false);
const includeTextContent = this.getNodeParameter('includeTextContent', i, true);
const includeMetadata = this.getNodeParameter('includeMetadata', i, true);
if (!html || html.trim() === '') {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'HTML input cannot be empty.');
}
const doc = new jsdom_1.JSDOM(html);
const article = new readability_1.Readability(doc.window.document).parse();
if (!article) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Could not extract readable content from the provided HTML.');
}
const result = {};
if (includeTextContent && article.textContent) {
result.textContent = article.textContent.trim();
result.length = article.length || article.textContent.length;
}
if (includeFullContent && article.content) {
result.content = article.content;
}
if (includeMetadata) {
if (article.title)
result.title = article.title;
if (article.excerpt)
result.excerpt = article.excerpt;
if (article.siteName)
result.siteName = article.siteName;
if (article.byline)
result.byline = article.byline;
if (article.lang)
result.language = article.lang;
if (article.publishedTime)
result.publishedTime = article.publishedTime;
if (article.dir)
result.textDirection = article.dir;
}
output.push({
json: result
});
}
return [this.helpers.returnJsonArray(output)];
}
}
exports.ReadabilityContentExtractor = ReadabilityContentExtractor;
//# sourceMappingURL=ReadabilityContentExtractor.node.js.map