camoufox-mcp-server
Version:
MCP server for browser automation using Camoufox - a privacy-focused Firefox fork with advanced anti-detection features
253 lines (252 loc) • 9.06 kB
JavaScript
import { MAX_EXTRACT_NODES } from "../config.js";
import { redactUrl } from "../utils.js";
export async function extractPageContent(page, outputMode, maxChars, selector) {
if (outputMode === "metadata") {
return {
value: "",
truncated: false,
found: false,
};
}
return page.evaluate(({ mode, maxLength, cssSelector, maxNodes }) => {
const root = cssSelector
? document.querySelector(cssSelector)
: document.body ?? document.documentElement;
if (!root) {
return { value: "", truncated: false, found: false };
}
const limit = maxLength + 1;
const blockedTextTags = new Set(["SCRIPT", "STYLE", "TEMPLATE", "NOSCRIPT"]);
const blockBoundaryTags = new Set([
"ADDRESS",
"ARTICLE",
"ASIDE",
"BLOCKQUOTE",
"BR",
"DD",
"DETAILS",
"DIALOG",
"DIV",
"DL",
"DT",
"FIELDSET",
"FIGCAPTION",
"FIGURE",
"FOOTER",
"FORM",
"H1",
"H2",
"H3",
"H4",
"H5",
"H6",
"HEADER",
"HR",
"LI",
"MAIN",
"NAV",
"OL",
"P",
"PRE",
"SECTION",
"TABLE",
"TBODY",
"TD",
"TFOOT",
"TH",
"THEAD",
"TR",
"UL",
]);
function appendBounded(current, chunk) {
const available = limit - current.length;
if (available <= 0) {
return { value: current, truncated: chunk.length > 0 };
}
if (chunk.length > available) {
return { value: `${current}${chunk.slice(0, available)}`, truncated: true };
}
return { value: `${current}${chunk}`, truncated: false };
}
function isHiddenElement(element) {
if (blockedTextTags.has(element.tagName)) {
return true;
}
if (element instanceof HTMLElement && element.hidden) {
return true;
}
if (element.getAttribute("aria-hidden") === "true") {
return true;
}
const style = window.getComputedStyle(element);
return style.display === "none" || style.visibility === "hidden" || style.visibility === "collapse";
}
if (mode === "html") {
const voidTags = new Set([
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"link",
"meta",
"param",
"source",
"track",
"wbr",
]);
let html = "";
let truncated = false;
let visitedNodes = 0;
const stack = [{ node: root, closing: false }];
function escapeText(value) {
return value
.replaceAll("&", "&")
.replaceAll("<", "<")
.replaceAll(">", ">");
}
function escapeAttribute(value) {
return escapeText(value).replaceAll("\"", """);
}
function appendHtml(chunk) {
const result = appendBounded(html, chunk);
html = result.value;
truncated = truncated || result.truncated;
}
while (stack.length > 0 && html.length < limit && visitedNodes < maxNodes) {
const current = stack.pop();
if (!current) {
break;
}
const { node, closing } = current;
if (closing) {
appendHtml(`</${node.tagName.toLowerCase()}>`);
continue;
}
visitedNodes += 1;
if (node.nodeType === Node.ELEMENT_NODE) {
const element = node;
const tagName = element.tagName.toLowerCase();
appendHtml(`<${tagName}`);
for (const attribute of Array.from(element.attributes)) {
appendHtml(` ${attribute.name}="${escapeAttribute(attribute.value)}"`);
}
appendHtml(">");
if (voidTags.has(tagName)) {
continue;
}
stack.push({ node: element, closing: true });
for (let index = element.childNodes.length - 1; index >= 0; index -= 1) {
stack.push({ node: element.childNodes[index], closing: false });
}
}
else if (node.nodeType === Node.TEXT_NODE) {
appendHtml(escapeText(node.nodeValue ?? ""));
}
else if (node.nodeType === Node.COMMENT_NODE) {
appendHtml(`<!--${node.nodeValue ?? ""}-->`);
}
}
truncated = truncated || stack.length > 0 || visitedNodes >= maxNodes || html.length > maxLength;
return {
value: html.slice(0, maxLength),
truncated,
found: true,
};
}
let text = "";
let truncated = false;
let visitedNodes = 0;
const stack = [{ node: root, closing: false }];
function appendText(chunk) {
const normalized = chunk.replace(/\s+/g, " ").trim();
if (!normalized) {
return;
}
const needsSpace = text.length > 0 && !/\s$/.test(text) && !/^[,.;:!?)]/.test(normalized);
const result = appendBounded(text, `${needsSpace ? " " : ""}${normalized}`);
text = result.value;
truncated = truncated || result.truncated;
}
function appendBoundary() {
if (!text || /\n$/.test(text)) {
return;
}
const result = appendBounded(text.replace(/[ \t]+$/, ""), "\n");
text = result.value;
truncated = truncated || result.truncated;
}
while (stack.length > 0 && text.length < limit && visitedNodes < maxNodes) {
const current = stack.pop();
if (!current) {
break;
}
const { node, closing } = current;
if (closing) {
if (blockBoundaryTags.has(node.tagName)) {
appendBoundary();
}
continue;
}
visitedNodes += 1;
if (node.nodeType === Node.ELEMENT_NODE) {
const element = node;
if (isHiddenElement(element)) {
continue;
}
if (element.tagName === "BR") {
appendBoundary();
continue;
}
if (blockBoundaryTags.has(element.tagName)) {
appendBoundary();
}
stack.push({ node: element, closing: true });
for (let index = element.childNodes.length - 1; index >= 0; index -= 1) {
stack.push({ node: element.childNodes[index], closing: false });
}
}
else if (node.nodeType === Node.TEXT_NODE) {
appendText(node.nodeValue ?? "");
}
}
const normalizedText = text
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
truncated = truncated || stack.length > 0 || visitedNodes >= maxNodes || text.length > maxLength || normalizedText.length > maxLength;
return {
value: normalizedText.slice(0, maxLength),
truncated,
found: true,
};
}, { mode: outputMode, maxLength: maxChars, cssSelector: selector, maxNodes: MAX_EXTRACT_NODES });
}
export async function buildBrowsePayload(page, response, outputMode, maxChars, selector) {
const payload = {
url: redactUrl(page.url()),
title: await page.title(),
status: response?.status(),
contentType: response?.headers()["content-type"],
outputMode,
truncated: false,
maxChars,
selector,
};
if (outputMode === "metadata") {
return payload;
}
const extracted = await extractPageContent(page, outputMode, maxChars, selector);
payload.truncated = extracted.truncated;
payload.selectorFound = extracted.found;
if (outputMode === "html") {
payload.html = extracted.value;
}
else {
payload.text = extracted.value;
}
return payload;
}