UNPKG

@plastichub/osr-ai-tools

Version:

CLI and library for LLM tools

146 lines (145 loc) 7.47 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.tools = void 0; const path = __importStar(require("path")); const write_1 = require("@plastichub/fs/write"); const puppeteer_1 = __importDefault(require("puppeteer")); const turndown_1 = __importDefault(require("turndown")); const turndown = new turndown_1.default(); const __1 = require("../.."); const tools = (target, options) => { const logger = (0, __1.toolLogger)(path.parse(__filename).name, options); return [ { type: 'function', function: { name: 'browse_page', description: 'Browse a webpage and return its content as markdown, all links, images and pages main image', parameters: { type: 'object', properties: { url: { type: 'string', description: 'URL of the webpage to browse' } }, required: ['url'] }, function: async (params) => { try { logger.debug(`Tool::BrowsePage Browsing ${params.url}`); const browser = await puppeteer_1.default.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); try { const page = await browser.newPage(); logger.debug(`Tool::Web::BrowsePage Opening page ${params.url}`); await page.goto(params.url, { waitUntil: 'networkidle2' }); if (params.wait_for) { await page.waitForSelector(params.wait_for); } const pageData = await page.evaluate((selector) => { const elementsToRemove = document.querySelectorAll('script, style, link, meta, noscript, iframe, [style*="display:none"],[style*="display: none"], .hidden'); elementsToRemove.forEach(el => el.remove()); // Extract all links (limited to 20) const links = Array.from(document.querySelectorAll('a')) .map(a => ({ text: a.textContent?.trim() || '', href: a.href })) .filter(link => link.href && link.href.startsWith('http')) .slice(0, 20); // Extract all images (limited to 20) const images = Array.from(document.querySelectorAll('img')) .map(img => ({ src: img.src, alt: img.alt || '', width: img.width, height: img.height })) .filter(img => img.src && img.src.startsWith('http')) .slice(0, 20); // Get og:image const mainImage = document.querySelector('meta[property="og:image"]')?.getAttribute('content') || document.querySelector('meta[name="og:image"]')?.getAttribute('content'); let content; if (selector) { const element = document.querySelector(selector); content = element ? element.innerHTML : ''; } else { const body = document.body; content = body ? body.innerHTML : ''; } return { content, links, images, ogImage: mainImage }; }, params.selector); const markdown = turndown.turndown(pageData.content); (0, write_1.sync)(path.join(target, 'tmp', 'web_puppeteer_last.md'), markdown); (0, write_1.sync)(path.join(target, 'tmp', 'web_puppeteer_last.html'), pageData.content); await browser.close(); const ret = { success: true, markdown: markdown, links: pageData.links, images: pageData.images, mainImage: pageData.ogImage, url: params.url }; (0, write_1.sync)(path.join(target, 'tmp', 'web_puppeteer.json'), ret); return ret; } catch (error) { logger.debug('Error browsing page:', error.message, error); await browser.close(); throw error; } } catch (error) { logger.debug('Error browsing page:', error.message); return { success: false, error: error.message, url: params.url }; } }, parse: JSON.parse } } ]; }; exports.tools = tools;