UNPKG

mycoder-agent

Version:

Agent module for mycoder - an AI-powered software development assistant

90 lines 3.32 kB
import { Readability } from '@mozilla/readability'; import { JSDOM } from 'jsdom'; const OUTPUT_LIMIT = 11 * 1024; // 10KB limit /** * Returns the raw HTML content of the page without any processing */ async function getNoneProcessedDOM(page) { return await page.content(); } /** * Processes the page using Mozilla's Readability to extract the main content * Falls back to simple processing if Readability fails */ async function getReadabilityProcessedDOM(page) { try { const html = await page.content(); const url = page.url(); const dom = new JSDOM(html, { url }); const reader = new Readability(dom.window.document); const article = reader.parse(); if (!article) { console.warn('Readability could not parse the page, falling back to simple mode'); return getSimpleProcessedDOM(page); } // Return a formatted version of the article return JSON.stringify({ url: url, title: article.title || '', content: article.content || '', textContent: article.textContent || '', excerpt: article.excerpt || '', byline: article.byline || '', dir: article.dir || '', siteName: article.siteName || '', length: article.length || 0, }, null, 2); } catch (error) { console.error('Error using Readability:', error); // Fallback to simple mode if Readability fails return getSimpleProcessedDOM(page); } } /** * Processes the page by removing invisible elements and non-visual tags */ async function getSimpleProcessedDOM(page) { const domContent = await page.evaluate(() => { const clone = document.documentElement; const elements = clone.querySelectorAll('*'); const elementsToRemove = []; elements.forEach((element) => { const computedStyle = window.getComputedStyle(element); const isVisible = computedStyle.display !== 'none' && computedStyle.visibility !== 'hidden' && computedStyle.opacity !== '0'; if (!isVisible) { elementsToRemove.push(element); } }); const nonVisualTags = clone.querySelectorAll('noscript, iframe, link[rel="stylesheet"], meta, svg, img, symbol, path, style, script'); nonVisualTags.forEach((element) => elementsToRemove.push(element)); elementsToRemove.forEach((element) => element.remove()); return clone.outerHTML; }); return domContent.replace(/\n/g, '').replace(/\s+/g, ' '); } /** * Gets the rendered DOM of a page with specified processing method */ export async function filterPageContent(page, pageFilter) { let result = ''; switch (pageFilter) { case 'none': result = await getNoneProcessedDOM(page); break; case 'readability': result = await getReadabilityProcessedDOM(page); break; case 'simple': default: result = await getSimpleProcessedDOM(page); break; } if (result.length > OUTPUT_LIMIT) { return result.slice(0, OUTPUT_LIMIT) + '...(truncated)'; } return result; } //# sourceMappingURL=filterPageContent.js.map