UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

304 lines 15.3 kB
/** * Smoke tests for visit_webpage — iter-57 * * Tests the upgraded visit_webpage implementation WITHOUT making live HTTP calls. * All fixtures use visitWebpageTestHooks.fetchHtml to inject canned HTML. * * Test fixtures: * 1. Plain HTML article — expects >500 chars, non-empty title * 2. Wikipedia-style page — expects infobox text extracted, links present * 3. JS-heavy minimal shell — falls back to regex, still returns something * 4. max_chars truncation — capped at 500 chars via parameter * 5. PDF URL (rejection) — graceful error message, not a throw to agent * 6. Invalid URL — validation rejects before fetch * 7. Link extraction — up to 30 links extracted from anchor tags * 8. extractTextFromHtml — direct test of the regex fallback path * * Run (after build): * node dist/src/benchmarks/gaia-visit-webpage.smoke.js * * Exit 0 on all pass, 1 on any failure. * * Refs: ADR-138, iter-57, #2156 */ // --------------------------------------------------------------------------- // Imports — dynamic to support both tsx (source) and compiled JS at runtime // --------------------------------------------------------------------------- // eslint-disable-next-line @typescript-eslint/no-explicit-any let _visitWebpage; // eslint-disable-next-line @typescript-eslint/no-explicit-any let _visitWebpageTestHooks; let _createVisitWebpageTool; // eslint-disable-next-line @typescript-eslint/no-unused-vars let _extractTextViaRegex; // --------------------------------------------------------------------------- // Fixture HTML // --------------------------------------------------------------------------- const PLAIN_ARTICLE_HTML = `<!DOCTYPE html> <html><head><title>The Life of Ada Lovelace</title></head> <body> <header><nav>Home | About | Contact</nav></header> <main> <article> <h1>Ada Lovelace: The First Programmer</h1> <p>Augusta Ada King, Countess of Lovelace, was an English mathematician and writer, chiefly known for her work on Charles Babbage's proposed mechanical general-purpose computer, the Analytical Engine.</p> <p>She was the first to recognise that the machine had applications beyond pure calculation, and to have published the first algorithm intended to be carried out by such a machine. As a result, she is often regarded as the first computer programmer.</p> <p>Born on 10 December 1815, Ada was the only legitimate child of the poet Lord Byron and his wife Lady Byron. All of Byron's other children were born out of wedlock to other women.</p> <p>She died on 27 November 1852, at the age of 36, from what was probably uterine cancer. She was buried at the Church of St. Mary Magdalene in Hucknall, Nottinghamshire.</p> </article> </main> <footer>Copyright 2026. All rights reserved.</footer> </body></html>`; const WIKIPEDIA_STYLE_HTML = `<!DOCTYPE html> <html><head><title>Charles Babbage - Wikipedia</title></head> <body> <div id="mw-content-text"> <table class="infobox"> <tr><th>Born</th><td>26 December 1791, London</td></tr> <tr><th>Died</th><td>18 October 1871 (aged 79), London</td></tr> <tr><th>Nationality</th><td>British</td></tr> </table> <p>Charles Babbage KH FRS was an English polymath. A mathematician, philosopher, inventor and mechanical engineer, Babbage originated the concept of a digital programmable computer.</p> <p>He invented the Difference Engine and conceived the Analytical Engine — precursors to the modern computer. His work on the Analytical Engine influenced Ada Lovelace, who wrote the first published algorithm for it.</p> </div> <div id="toc"><a href="#history">History</a><a href="#legacy">Legacy</a></div> </body></html>`; const JS_HEAVY_SHELL_HTML = `<!DOCTYPE html> <html><head><title>React App</title></head> <body> <div id="root"></div> <script>!function(e){function r(r){for(var n,i,s=r[0],l=r[1]}window.__WEBPACK_MODULES__={};</script> </body></html>`; const LINKS_HTML = `<!DOCTYPE html> <html><head><title>Links Test Page</title></head> <body> <p>See <a href="https://example.com/article1">Article One</a> for details.</p> <p>Also check <a href="https://example.com/article2">Article Two</a> and <a href="https://wikipedia.org/wiki/Test">Wikipedia Test</a>.</p> <p><a href="javascript:void(0)">Bad link</a> should be excluded.</p> <p><a href="mailto:test@example.com">Email</a> should be excluded.</p> <p>Another <a href="/relative/path">Relative link</a> should resolve.</p> <p>Content text here to ensure non-empty output for the test to pass.</p> </body></html>`; function assert(cond, msg) { if (!cond) throw new Error(`Assertion failed: ${msg}`); } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- const TESTS = [ // ------------------------------------------------------------------------- // Test 1: Plain HTML article — primary path // ------------------------------------------------------------------------- { label: 'T1: plain HTML article — >500 chars, non-empty title', async run() { _visitWebpageTestHooks.fetchHtml = async (_url) => ({ html: PLAIN_ARTICLE_HTML, finalUrl: 'https://example.com/ada-lovelace', }); const result = await _visitWebpage('https://example.com/ada-lovelace', 50_000); assert(result.title === 'The Life of Ada Lovelace', `Expected title, got "${result.title}"`); assert(result.content.length > 500, `Content too short: ${result.content.length} chars`); assert(!result.truncated, 'Should not be truncated'); assert(result.sources_consulted.includes('https://example.com/ada-lovelace'), 'URL in sources'); }, }, // ------------------------------------------------------------------------- // Test 2: Wikipedia-style page — infobox text + links // ------------------------------------------------------------------------- { label: 'T2: Wikipedia-style page — infobox and body text extracted', async run() { _visitWebpageTestHooks.fetchHtml = async (_url) => ({ html: WIKIPEDIA_STYLE_HTML, finalUrl: 'https://en.wikipedia.org/wiki/Charles_Babbage', }); const result = await _visitWebpage('https://en.wikipedia.org/wiki/Charles_Babbage', 50_000); assert(result.title === 'Charles Babbage - Wikipedia', `Title: ${result.title}`); assert(result.content.length > 200, `Content too short: ${result.content.length}`); // Content must contain some of the article text assert(result.content.includes('Babbage') || result.content.includes('Difference Engine'), `Expected article text, got: ${result.content.slice(0, 200)}`); }, }, // ------------------------------------------------------------------------- // Test 3: JS-heavy shell — falls back to regex, still returns something // ------------------------------------------------------------------------- { label: 'T3: JS-heavy shell — fallback extracts non-empty content', async run() { _visitWebpageTestHooks.fetchHtml = async (_url) => ({ html: JS_HEAVY_SHELL_HTML, finalUrl: 'https://spa-app.example.com', }); const tool = _createVisitWebpageTool(); // For JS-heavy pages, Python bs4 will extract the title at minimum. // The tool should return a non-throwing response. let output; try { output = await tool.execute({ url: 'https://spa-app.example.com' }); // Either returns content or the "no readable text" message — both are OK assert(typeof output === 'string' && output.length > 0, 'Must return non-empty string'); assert(!output.toLowerCase().includes('error'), `Unexpected error in output: ${output.slice(0, 100)}`); } catch (err) { throw new Error(`T3 threw unexpectedly: ${String(err)}`); } }, }, // ------------------------------------------------------------------------- // Test 4: max_chars truncation // ------------------------------------------------------------------------- { label: 'T4: max_chars truncation — content capped at 500', async run() { _visitWebpageTestHooks.fetchHtml = async (_url) => ({ html: PLAIN_ARTICLE_HTML, finalUrl: 'https://example.com/ada-lovelace', }); const result = await _visitWebpage('https://example.com/ada-lovelace', 500); assert(result.truncated, 'Expected truncated=true'); assert(result.chars_returned <= 500, `chars_returned=${result.chars_returned} > 500`); assert(result.content.length <= 500, `content.length=${result.content.length} > 500`); }, }, // ------------------------------------------------------------------------- // Test 5: PDF URL — graceful error, not throw to agent // ------------------------------------------------------------------------- { label: 'T5: PDF URL — graceful error message returned', async run() { // Simulate what fetchPage would throw for a PDF _visitWebpageTestHooks.fetchHtml = async (_url) => { throw new Error('visit_webpage: URL returns a PDF. Use file_read with the downloaded path instead. URL: https://example.com/doc.pdf'); }; const tool = _createVisitWebpageTool(); let output; try { // The tool.execute should propagate the throw — the agent loop catches it await tool.execute({ url: 'https://example.com/doc.pdf' }); throw new Error('T5: Expected tool to throw for PDF URL'); } catch (err) { const msg = String(err); assert(msg.includes('PDF') || msg.includes('visit_webpage'), `Error should mention PDF: ${msg}`); output = msg; } assert(output.length > 0, 'Should have error message'); }, }, // ------------------------------------------------------------------------- // Test 6: Invalid URL — validation rejects // ------------------------------------------------------------------------- { label: 'T6: invalid URL — validation error thrown', async run() { const tool = _createVisitWebpageTool(); try { await tool.execute({ url: 'not-a-url' }); throw new Error('T6: Expected validation error'); } catch (err) { const msg = String(err); assert(msg.includes('visit_webpage') && (msg.includes('URL') || msg.includes('url')), `Expected validation error, got: ${msg}`); } }, }, // ------------------------------------------------------------------------- // Test 7: Link extraction // ------------------------------------------------------------------------- { label: 'T7: link extraction — absolute and relative links extracted', async run() { _visitWebpageTestHooks.fetchHtml = async (_url) => ({ html: LINKS_HTML, finalUrl: 'https://example.com/page', }); const result = await _visitWebpage('https://example.com/page', 50_000); assert(result.links.length >= 3, `Expected ≥3 links, got ${result.links.length}: ${JSON.stringify(result.links)}`); const hrefs = result.links.map((l) => l.href); assert(hrefs.some((h) => h.includes('article1')), 'Missing article1 link'); assert(hrefs.some((h) => h.includes('article2')), 'Missing article2 link'); assert(hrefs.some((h) => h.includes('wikipedia')), 'Missing wikipedia link'); // javascript: and mailto: links must be excluded assert(!hrefs.some((h) => h.startsWith('javascript:')), 'javascript: link must be excluded'); assert(!hrefs.some((h) => h.startsWith('mailto:')), 'mailto: link must be excluded'); // Link text must be non-empty for (const link of result.links) { assert(link.text.length > 0, `Link with empty text: ${JSON.stringify(link)}`); } }, }, // ------------------------------------------------------------------------- // Test 8: redirect tracking // ------------------------------------------------------------------------- { label: 'T8: redirect tracking — sources_consulted includes both URLs', async run() { _visitWebpageTestHooks.fetchHtml = async (_url) => ({ html: PLAIN_ARTICLE_HTML, finalUrl: 'https://example.com/redirected-page', }); const result = await _visitWebpage('https://example.com/original', 50_000); assert(result.sources_consulted.includes('https://example.com/original'), 'sources_consulted missing original URL'); assert(result.sources_consulted.includes('https://example.com/redirected-page'), 'sources_consulted missing final URL'); }, }, ]; // --------------------------------------------------------------------------- // Runner // --------------------------------------------------------------------------- async function main() { const mod = await import('./gaia-tools/index.js'); // eslint-disable-next-line @typescript-eslint/no-explicit-any const m = mod; _visitWebpage = m.visitWebpage; _visitWebpageTestHooks = m.visitWebpageTestHooks; _createVisitWebpageTool = m.createVisitWebpageTool; if (typeof _visitWebpage !== 'function') { console.error('ERROR: visitWebpage not exported from gaia-tools/index.\n' + 'Add `export { visitWebpage, visitWebpageTestHooks } from ./visit_webpage.js` to index.ts.'); process.exit(1); } if (!_visitWebpageTestHooks || typeof _visitWebpageTestHooks !== 'object') { console.error('ERROR: visitWebpageTestHooks not exported from gaia-tools/index.'); process.exit(1); } const PASS = '\x1b[32mPASS\x1b[0m'; const FAIL = '\x1b[31mFAIL\x1b[0m'; let failures = 0; console.log('\n=== gaia-visit-webpage smoke (iter-57) ===\n'); for (const tc of TESTS) { // Reset test hooks before each test _visitWebpageTestHooks.fetchHtml = undefined; try { await tc.run(); console.log(` ${PASS} ${tc.label}`); } catch (err) { console.log(` ${FAIL} ${tc.label}`); console.log(` ${String(err)}`); failures++; } } const total = TESTS.length; const passed = total - failures; console.log(`\n=== ${failures === 0 ? 'ALL PASSED' : `${failures} FAILED`} (${passed}/${total} cases) ===\n`); if (failures > 0) process.exit(1); } main().catch((err) => { console.error('Smoke crashed:', err); process.exit(2); }); export {}; //# sourceMappingURL=gaia-visit-webpage.smoke.js.map