@plastichub/osr-ai-tools
Version:
CLI and library for LLM tools
146 lines (145 loc) • 7.47 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.tools = void 0;
const path = __importStar(require("path"));
const write_1 = require("@plastichub/fs/write");
const puppeteer_1 = __importDefault(require("puppeteer"));
const turndown_1 = __importDefault(require("turndown"));
const turndown = new turndown_1.default();
const __1 = require("../..");
const tools = (target, options) => {
const logger = (0, __1.toolLogger)(path.parse(__filename).name, options);
return [
{
type: 'function',
function: {
name: 'browse_page',
description: 'Browse a webpage and return its content as markdown, all links, images and pages main image',
parameters: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'URL of the webpage to browse'
}
},
required: ['url']
},
function: async (params) => {
try {
logger.debug(`Tool::BrowsePage Browsing ${params.url}`);
const browser = await puppeteer_1.default.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
const page = await browser.newPage();
logger.debug(`Tool::Web::BrowsePage Opening page ${params.url}`);
await page.goto(params.url, {
waitUntil: 'networkidle2'
});
if (params.wait_for) {
await page.waitForSelector(params.wait_for);
}
const pageData = await page.evaluate((selector) => {
const elementsToRemove = document.querySelectorAll('script, style, link, meta, noscript, iframe, [style*="display:none"],[style*="display: none"], .hidden');
elementsToRemove.forEach(el => el.remove());
// Extract all links (limited to 20)
const links = Array.from(document.querySelectorAll('a'))
.map(a => ({
text: a.textContent?.trim() || '',
href: a.href
}))
.filter(link => link.href && link.href.startsWith('http'))
.slice(0, 20);
// Extract all images (limited to 20)
const images = Array.from(document.querySelectorAll('img'))
.map(img => ({
src: img.src,
alt: img.alt || '',
width: img.width,
height: img.height
}))
.filter(img => img.src && img.src.startsWith('http'))
.slice(0, 20);
// Get og:image
const mainImage = document.querySelector('meta[property="og:image"]')?.getAttribute('content') ||
document.querySelector('meta[name="og:image"]')?.getAttribute('content');
let content;
if (selector) {
const element = document.querySelector(selector);
content = element ? element.innerHTML : '';
}
else {
const body = document.body;
content = body ? body.innerHTML : '';
}
return {
content,
links,
images,
ogImage: mainImage
};
}, params.selector);
const markdown = turndown.turndown(pageData.content);
(0, write_1.sync)(path.join(target, 'tmp', 'web_puppeteer_last.md'), markdown);
(0, write_1.sync)(path.join(target, 'tmp', 'web_puppeteer_last.html'), pageData.content);
await browser.close();
const ret = {
success: true,
markdown: markdown,
links: pageData.links,
images: pageData.images,
mainImage: pageData.ogImage,
url: params.url
};
(0, write_1.sync)(path.join(target, 'tmp', 'web_puppeteer.json'), ret);
return ret;
}
catch (error) {
logger.debug('Error browsing page:', error.message, error);
await browser.close();
throw error;
}
}
catch (error) {
logger.debug('Error browsing page:', error.message);
return {
success: false,
error: error.message,
url: params.url
};
}
},
parse: JSON.parse
}
}
];
};
exports.tools = tools;
;