donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
459 lines • 13.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.captureDomSnapshot = captureDomSnapshot;
/**
* Maximum number of characters the serialized snapshot may contain.
* Viewport content is always included first; off-screen content fills the
* remainder up to this budget.
*/
const DEFAULT_MAX_CHARS = 80_000;
/**
* Capture a pruned DOM snapshot of the page (and its iframes) suitable for
* sending to an LLM alongside a viewport screenshot.
*
* The snapshot preserves hierarchy and attributes useful for writing Playwright
* locators (roles, labels, test IDs, text content) while stripping noise
* (inline styles, scripts, SVGs, tracking attributes, etc.).
*
* Content within the viewport is always preserved. Off-screen content is
* included in document order up to the character budget, then truncated.
*/
async function captureDomSnapshot(page, maxChars = DEFAULT_MAX_CHARS) {
const mainTree = await captureFrameTree(page.mainFrame(), 'main');
// Capture child frames and inline them at their <iframe> positions
const childFrames = page
.frames()
.filter((f) => f !== page.mainFrame() && f.url() !== 'about:blank');
const frameTrees = new Map();
for (const frame of childFrames) {
try {
const frameTree = await captureFrameTree(frame, frame.name() || frame.url());
const key = await getFrameIdentifier(frame);
if (key) {
frameTrees.set(key, frameTree);
}
}
catch {
// Frame may have been detached or navigated away — skip it.
}
}
// Inline frame content into the main tree
inlineFrames(mainTree, frameTrees);
// Serialize with viewport-priority truncation
return serializeWithBudget(mainTree, maxChars);
}
/**
* Get a stable identifier for a frame based on its iframe element's attributes.
*/
async function getFrameIdentifier(frame) {
try {
const element = await frame.frameElement();
return element.evaluate((el) => {
return (el.getAttribute('name') ||
el.getAttribute('id') ||
el.getAttribute('src') ||
null);
});
}
catch {
return null;
}
}
/** Attributes worth keeping for locator authoring. */
const KEEP_ATTRS = new Set([
'role',
'aria-label',
'aria-labelledby',
'aria-describedby',
'aria-placeholder',
'aria-roledescription',
'aria-expanded',
'aria-checked',
'aria-selected',
'aria-disabled',
'aria-haspopup',
'aria-current',
'aria-required',
'aria-invalid',
'data-testid',
'data-test-id',
'data-cy',
'data-test',
'name',
'id',
'type',
'placeholder',
'href',
'value',
'alt',
'title',
'for',
'action',
'method',
'src',
'target',
'rel',
'disabled',
'readonly',
'checked',
'selected',
'open',
'hidden',
'contenteditable',
'draggable',
'tabindex',
]);
/** Tags that are always kept because they carry semantic or structural meaning. */
const SEMANTIC_TAGS = new Set([
'html',
'head',
'body',
'header',
'nav',
'main',
'aside',
'footer',
'section',
'article',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'p',
'blockquote',
'pre',
'code',
'ul',
'ol',
'li',
'dl',
'dt',
'dd',
'table',
'thead',
'tbody',
'tfoot',
'tr',
'th',
'td',
'caption',
'colgroup',
'col',
'form',
'fieldset',
'legend',
'label',
'input',
'textarea',
'select',
'option',
'optgroup',
'button',
'output',
'datalist',
'a',
'img',
'picture',
'figure',
'figcaption',
'video',
'audio',
'source',
'track',
'iframe',
'details',
'summary',
'dialog',
'menu',
]);
/** Tags that are always excluded (content is noise for locator purposes). */
const SKIP_TAGS = new Set([
'script',
'style',
'link',
'meta',
'noscript',
'svg',
'path',
'circle',
'rect',
'line',
'polyline',
'polygon',
'ellipse',
'g',
'defs',
'use',
'symbol',
'clippath',
'template',
]);
const MAX_TEXT_LENGTH = 80;
/**
* Capture the DOM tree for a single frame, running entirely in the browser.
*/
async function captureFrameTree(frame, label) {
return frame.evaluate(({ keepAttrs, semanticTags, skipTags, maxTextLen, frameLabel, }) => {
const keepSet = new Set(keepAttrs);
const semanticSet = new Set(semanticTags);
const skipSet = new Set(skipTags);
const vh = window.innerHeight;
const vw = window.innerWidth;
function isInViewport(el) {
const rect = el.getBoundingClientRect();
if (rect.width === 0 && rect.height === 0) {
return false;
}
return (rect.bottom > 0 && rect.top < vh && rect.right > 0 && rect.left < vw);
}
function walkElement(el, depth) {
if (depth > 25) {
return null;
}
const tag = el.tagName.toLowerCase();
if (skipSet.has(tag)) {
return null;
}
const attrs = {};
const attrList = Array.from(el.attributes);
for (let ai = 0; ai < attrList.length; ai++) {
const attr = attrList[ai];
if (keepSet.has(attr.name)) {
let val = attr.value;
// Truncate long attribute values (e.g., long hrefs)
if (val.length > 200) {
val = val.slice(0, 200) + '…';
}
attrs[attr.name] = val;
}
}
const isSemantic = semanticSet.has(tag);
const hasUsefulAttrs = Object.keys(attrs).length > 0;
const inVp = isInViewport(el);
// Walk children
const children = [];
const childElements = Array.from(el.children);
for (let ci = 0; ci < childElements.length; ci++) {
const serialized = walkElement(childElements[ci], depth + 1);
if (serialized) {
children.push(serialized);
}
}
// Get direct text content (not from children)
let text;
const childNodes = Array.from(el.childNodes);
for (let ni = 0; ni < childNodes.length; ni++) {
const node = childNodes[ni];
if (node.nodeType === Node.TEXT_NODE) {
const t = (node.textContent ?? '').trim();
if (t) {
text = (text ? text + ' ' : '') + t;
}
}
}
if (text && text.length > maxTextLen) {
text = text.slice(0, maxTextLen) + '…';
}
// Decide whether to keep this node or collapse it
const hasContent = text || children.length > 0;
if (!isSemantic && !hasUsefulAttrs) {
// Non-semantic div/span with no useful attrs — collapse
if (children.length === 1 && !text) {
// Single child: promote child up
return children[0];
}
if (children.length > 1) {
// Multiple children: keep as a generic wrapper but without the tag noise
// We still need structure, so keep it but mark it minimal
}
if (!hasContent) {
return null;
}
}
const node = { tag, attrs };
if (text) {
node.text = text;
}
if (children.length > 0) {
node.children = children;
}
if (inVp) {
node.inViewport = true;
}
return node;
}
const root = walkElement(document.documentElement, 0);
return root ?? { tag: frameLabel, attrs: {} };
}, {
keepAttrs: [...KEEP_ATTRS],
semanticTags: [...SEMANTIC_TAGS],
skipTags: [...SKIP_TAGS],
maxTextLen: MAX_TEXT_LENGTH,
frameLabel: label,
});
}
/**
* Replace `<iframe>` nodes in the main tree with the captured frame content.
*/
function inlineFrames(node, frameTrees) {
if (!node.children) {
return;
}
for (let i = 0; i < node.children.length; i++) {
const child = node.children[i];
if (child.tag === 'iframe') {
const key = child.attrs['name'] || child.attrs['id'] || child.attrs['src'];
if (key && frameTrees.has(key)) {
// Wrap frame content under the iframe node
child.children = [frameTrees.get(key)];
frameTrees.delete(key);
}
}
else {
inlineFrames(child, frameTrees);
}
}
}
/**
* Serialize the tree to HTML, prioritizing viewport content within the
* character budget.
*/
function serializeWithBudget(root, maxChars) {
// First pass: serialize viewport nodes
let html = '';
let omittedCount = 0;
function serializeNode(node, indent, viewportOnly) {
if (viewportOnly && !nodeHasViewportContent(node)) {
return '';
}
const pad = ' '.repeat(indent);
const attrStr = Object.entries(node.attrs)
.map(([k, v]) => (v === '' ? k : `${k}="${escapeAttr(v)}"`))
.join(' ');
const openTag = attrStr ? `<${node.tag} ${attrStr}>` : `<${node.tag}>`;
// Self-closing tags
if (['input', 'img', 'br', 'hr', 'col', 'source', 'track'].includes(node.tag)) {
return `${pad}${attrStr ? `<${node.tag} ${attrStr} />` : `<${node.tag} />`}\n`;
}
let result = `${pad}${openTag}`;
const hasChildren = node.children && node.children.length > 0;
const hasText = node.text;
if (!hasChildren && !hasText) {
if (node.truncated) {
result += `\n${pad} <!-- ${node.truncated} child elements omitted -->\n${pad}</${node.tag}>\n`;
}
else {
result += `</${node.tag}>\n`;
}
}
else if (!hasChildren && hasText) {
result += `${escapeText(node.text)}</${node.tag}>\n`;
}
else {
result += '\n';
if (hasText) {
result += `${pad} ${escapeText(node.text)}\n`;
}
if (hasChildren) {
for (const child of node.children) {
result += serializeNode(child, indent + 1, viewportOnly);
}
}
if (node.truncated) {
result += `${pad} <!-- ${node.truncated} elements below viewport omitted -->\n`;
}
result += `${pad}</${node.tag}>\n`;
}
return result;
}
// Fast path: try the full tree first — most pages fit within budget.
const fullHtml = serializeNode(root, 0, false);
if (fullHtml.length <= maxChars) {
return { html: fullHtml, omittedCount: 0 };
}
// Full tree exceeds budget — compute viewport-only size, then truncate
// off-viewport content to fill the remaining space.
html = serializeNode(root, 0, true);
omittedCount = countOffViewportNodes(root);
truncateOffViewportNodes(root, maxChars - html.length);
html = serializeNode(root, 0, false);
return { html, omittedCount };
}
function nodeHasViewportContent(node) {
if (node.inViewport) {
return true;
}
if (node.children) {
return node.children.some(nodeHasViewportContent);
}
return false;
}
function countOffViewportNodes(node) {
let count = 0;
if (!node.inViewport) {
count++;
}
if (node.children) {
for (const child of node.children) {
count += countOffViewportNodes(child);
}
}
return count;
}
/**
* Remove off-viewport children that would push the total over the remaining
* character budget, replacing them with a truncation marker.
*/
function truncateOffViewportNodes(node, remaining) {
if (!node.children) {
return remaining;
}
const kept = [];
let truncatedCount = 0;
// Process viewport children first (always keep), then off-viewport
const viewportChildren = node.children.filter(nodeHasViewportContent);
const offViewportChildren = node.children.filter((c) => !nodeHasViewportContent(c));
for (const child of viewportChildren) {
remaining = truncateOffViewportNodes(child, remaining);
kept.push(child);
}
for (const child of offViewportChildren) {
const estimate = estimateNodeSize(child);
if (remaining - estimate > 0) {
remaining -= estimate;
kept.push(child);
}
else {
truncatedCount++;
}
}
node.children = kept;
if (truncatedCount > 0) {
node.truncated = truncatedCount;
}
return remaining;
}
function estimateNodeSize(node) {
let size = node.tag.length * 2 + 5; // open+close tags
for (const [k, v] of Object.entries(node.attrs)) {
size += k.length + v.length + 4;
}
if (node.text) {
size += node.text.length;
}
if (node.children) {
for (const child of node.children) {
size += estimateNodeSize(child);
}
}
return size;
}
function escapeAttr(s) {
return s.replace(/&/g, '&').replace(/"/g, '"');
}
function escapeText(s) {
return s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
}
//# sourceMappingURL=domSnapshot.js.map