UNPKG

nicechat

Version:

An extensible AI chat framework for OpenAi's models

95 lines (94 loc) 4.02 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const turndown_1 = __importDefault(require("turndown")); const FetchWebsite = { meta: { name: "fetch_website", description: "Fetch website's main content from the internet as markdown", parameters: { type: "object", properties: { url: { type: "string", description: "The url of the website to fetch", }, }, }, }, execute: (args_1, _a) => __awaiter(void 0, [args_1, _a], void 0, function* (args, { toolkit }) { const url = JSON.parse(args)["url"]; toolkit.debug(`Fetching: ${url}`); const html = yield fetch(url).then((x) => x.text()); // toolkit.debug(`Raw html: ${html}`); const res = htmlToMd(html, toolkit.debug); toolkit.debug(`Result: ${res}`); return res; }), }; exports.default = FetchWebsite; function htmlToMd(html, d) { const hasMainTag = html.includes("<main"); d("hasMainTag: " + hasMainTag); const hasArticleTag = html.includes("<article"); d("hasArticleTag: " + hasArticleTag); const hasMultipleArticleTags = html.split("<article").length > 2; // stretegy 1: if there is only one article tag, then we can just use that if (hasArticleTag && !hasMultipleArticleTags) { const article = "<article" + html.split("<article")[1].split("</article>")[0] + "</article>"; const res = processHtml(removeStyleAndScript(article)); // console.log("res", res); return res; } // stretegy 2: if there is a main tag, then we can just use that if (hasMainTag) { const mainContent = "<main" + html.split("<main")[1].split("</main>")[0] + "</main>"; const res = processHtml(removeStyleAndScript(mainContent)); // console.log("res", res); return res; } // stretegy 3: if there is a body tag, then we can just use that const bodyContent = "<body" + html.split("<body")[1].split("</body>")[0] + "</body>"; d("bodyContent: " + bodyContent); const res = processHtml(removeStyleAndScript(bodyContent)); // console.log("res", res); return res; } function removeStyleAndScript(html) { // Remove style tags and their contents let cleanedHtml = html.replace(/<style[\s\S]*?<\/style>/gi, ""); // Remove script tags and their contents cleanedHtml = cleanedHtml.replace(/<script[\s\S]*?<\/script>/gi, ""); return cleanedHtml; } function processHtml(html) { const parts = html.split(/(<[^>]+>)/).filter((s) => !!s); const res = parts.map((s) => { if (s.startsWith("<") && !s.startsWith("</")) { const n = s .split(" ") .filter((x) => x.startsWith("<") || x.startsWith(">") || x.startsWith("href")) .join(" "); if (!n.endsWith(">")) { return n + ">"; } return n; } return s; }); const turndownService = new turndown_1.default({ headingStyle: "atx" }); return turndownService.turndown(res.join("")); }