UNPKG

article-parser

Version:

To extract main article from given URL

28 lines (24 loc) 794 B
// utils/extractWithReadability import { Readability } from '@mozilla/readability' import { DOMParser } from 'linkedom' import { isString } from 'bellajs' export default (html, inputUrl = '') => { if (!isString(html)) { return null } const doc = new DOMParser().parseFromString(html, 'text/html') const base = doc.createElement('base') base.setAttribute('href', inputUrl) doc.head.appendChild(base) const reader = new Readability(doc) const result = reader.parse() ?? {} return result.textContent ? result.content : null } export function extractTitleWithReadability (html) { if (!isString(html)) { return null } const doc = new DOMParser().parseFromString(html, 'text/html') const reader = new Readability(doc) return reader._getArticleTitle() || null }