UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

31 lines 1.11 kB
import { Readability } from '@mozilla/readability'; import { JSDOM, VirtualConsole } from 'jsdom'; import { request } from 'gaxios'; import { Logger } from './Logger.js'; import { getChromeOnWindowsHeaders } from './BrowserRequestHeaders.js'; import { convertHtmlToText } from './StringUtilities.js'; export async function fetchDocumentText(url) { const progressLogger = new Logger(); progressLogger.start(`Fetch ${url}`); const parsedUrl = new URL(url); const origin = parsedUrl.origin; const response = await request({ url, responseType: 'text', headers: getChromeOnWindowsHeaders({ origin: origin, referrer: `${origin}/` }), }); progressLogger.start(`Parse document body`); const doc = new JSDOM(response.data, { url, virtualConsole: new VirtualConsole() }); const reader = new Readability(doc.window.document); const article = reader.parse(); const text = await convertHtmlToText(article?.content || ''); progressLogger.end(); return text; } //# sourceMappingURL=WebReader.js.map