UNPKG

@measey/mycoder-agent

Version:

Agent module for mycoder - an AI-powered software development assistant

100 lines 3.94 kB
import { createProvider } from '../../../core/llm/provider.js'; const OUTPUT_LIMIT = 11 * 1024; // 10KB limit /** * Returns the raw HTML content of the page without any processing */ async function getRawDOM(page) { const content = await page.content(); return content; } /** * Uses an LLM to extract the main content from a page and format it as markdown */ async function getSmartMarkdownContent(page, context) { try { const html = await page.content(); const url = page.url(); // Create a system prompt for the LLM const systemPrompt = `You are an expert at extracting the main content from web pages. Given the HTML content of a webpage, extract only the main informative content. Format the extracted content as clean, well-structured markdown. Ignore headers, footers, navigation, sidebars, ads, and other non-content elements. Preserve the important headings, paragraphs, lists, and other content structures. Do not include any explanations or descriptions about what you're doing. Just return the extracted content as markdown.`; // Use the configured LLM to extract the content const { provider, model, apiKey, baseUrl } = context; if (!provider || !model) { context.logger.warn('LLM provider or model not available, falling back to raw DOM'); return getRawDOM(page); } try { // Create a provider instance using the provider abstraction const llmProvider = createProvider(provider, model, { apiKey, baseUrl, }); // Generate text using the provider const response = await llmProvider.generateText({ messages: [ { role: 'system', content: systemPrompt, }, { role: 'user', content: `URL: ${url}\n\nHTML content:\n${html}`, }, ], temperature: 0.3, maxTokens: 4000, }); // Extract the markdown content from the response const markdown = response.text; if (!markdown) { context.logger.warn('LLM returned empty content, falling back to raw DOM'); return getRawDOM(page); } // Log token usage for monitoring context.logger.debug(`Token usage for content extraction: ${JSON.stringify(response.tokenUsage)}`); return markdown; } catch (llmError) { context.logger.error('Error using LLM provider for content extraction:', llmError); return getRawDOM(page); } } catch (error) { context.logger.error('Error using LLM for content extraction:', error); // Fallback to raw mode if LLM processing fails return getRawDOM(page); } } /** * Gets the rendered DOM of a page with specified processing method */ export async function filterPageContent(page, contentFilter, context) { let result = ''; switch (contentFilter) { case 'smartMarkdown': if (!context) { console.warn('ToolContext required for smartMarkdown filter but not provided, falling back to raw mode'); result = await getRawDOM(page); } else { result = await getSmartMarkdownContent(page, context); } break; case 'raw': default: result = await getRawDOM(page); break; } // Ensure result is a string before checking length const resultString = result || ''; if (resultString.length > OUTPUT_LIMIT) { return resultString.slice(0, OUTPUT_LIMIT) + '...(truncated)'; } return resultString; } //# sourceMappingURL=filterPageContent.js.map