nicechat
Version:
An extensible AI chat framework for OpenAi's models
95 lines (94 loc) • 4.02 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const turndown_1 = __importDefault(require("turndown"));
const FetchWebsite = {
meta: {
name: "fetch_website",
description: "Fetch website's main content from the internet as markdown",
parameters: {
type: "object",
properties: {
url: {
type: "string",
description: "The url of the website to fetch",
},
},
},
},
execute: (args_1, _a) => __awaiter(void 0, [args_1, _a], void 0, function* (args, { toolkit }) {
const url = JSON.parse(args)["url"];
toolkit.debug(`Fetching: ${url}`);
const html = yield fetch(url).then((x) => x.text());
// toolkit.debug(`Raw html: ${html}`);
const res = htmlToMd(html, toolkit.debug);
toolkit.debug(`Result: ${res}`);
return res;
}),
};
exports.default = FetchWebsite;
function htmlToMd(html, d) {
const hasMainTag = html.includes("<main");
d("hasMainTag: " + hasMainTag);
const hasArticleTag = html.includes("<article");
d("hasArticleTag: " + hasArticleTag);
const hasMultipleArticleTags = html.split("<article").length > 2;
// stretegy 1: if there is only one article tag, then we can just use that
if (hasArticleTag && !hasMultipleArticleTags) {
const article = "<article" +
html.split("<article")[1].split("</article>")[0] +
"</article>";
const res = processHtml(removeStyleAndScript(article));
// console.log("res", res);
return res;
}
// stretegy 2: if there is a main tag, then we can just use that
if (hasMainTag) {
const mainContent = "<main" + html.split("<main")[1].split("</main>")[0] + "</main>";
const res = processHtml(removeStyleAndScript(mainContent));
// console.log("res", res);
return res;
}
// stretegy 3: if there is a body tag, then we can just use that
const bodyContent = "<body" + html.split("<body")[1].split("</body>")[0] + "</body>";
d("bodyContent: " + bodyContent);
const res = processHtml(removeStyleAndScript(bodyContent));
// console.log("res", res);
return res;
}
function removeStyleAndScript(html) {
// Remove style tags and their contents
let cleanedHtml = html.replace(/<style[\s\S]*?<\/style>/gi, "");
// Remove script tags and their contents
cleanedHtml = cleanedHtml.replace(/<script[\s\S]*?<\/script>/gi, "");
return cleanedHtml;
}
function processHtml(html) {
const parts = html.split(/(<[^>]+>)/).filter((s) => !!s);
const res = parts.map((s) => {
if (s.startsWith("<") && !s.startsWith("</")) {
const n = s
.split(" ")
.filter((x) => x.startsWith("<") || x.startsWith(">") || x.startsWith("href"))
.join(" ");
if (!n.endsWith(">")) {
return n + ">";
}
return n;
}
return s;
});
const turndownService = new turndown_1.default({ headingStyle: "atx" });
return turndownService.turndown(res.join(""));
}