UNPKG

doc-to-readable

Version:

Universal document-to-markdown and section splitter for HTML, URLs, and PDFs.

19 lines (18 loc) 630 B
// Article extraction utility using Readability // Exports: extractArticleFromDom import { Readability } from '@mozilla/readability'; /** * Extracts the main article content from a DOM using Readability (browser or Node.js) * @param {Document} doc * @returns {string} Extracted article HTML or empty string */ export function extractArticleFromDom(doc) { // Always use the imported Readability class if (!Readability) { console.debug('[extractArticleFromDom] Readability class not found'); return ''; } const reader = new Readability(doc); const article = reader.parse(); return article?.content || ''; }