UNPKG

navflow-browser-server

Version:

Standalone Playwright browser server for NavFlow - enables browser automation with API key authentication, workspace device management, session sync, LLM discovery tools, and requires Node.js v22+

658 lines 30.9 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.BrowserManager = void 0; const playwright_1 = require("playwright"); const path_1 = __importDefault(require("path")); const promises_1 = __importDefault(require("fs/promises")); const child_process_1 = require("child_process"); const node_html_markdown_1 = require("node-html-markdown"); const DataDirectory_1 = require("./DataDirectory"); class BrowserManager { constructor(deviceRegistry) { this.sessions = new Map(); this.localChromePath = null; this.savingStates = new Set(); // Track sessions currently being saved this.deviceRegistry = null; this.cookiesDir = DataDirectory_1.DataDirectory.getCookiesDir(); this.deviceRegistry = deviceRegistry || null; this.ensureDirectories(); this.detectLocalChrome(); // Cleanup old sessions every 5 minutes setInterval(() => this.cleanupOldSessions(), 5 * 60 * 1000); } async ensureDirectories() { await DataDirectory_1.DataDirectory.ensureDirectories(); } generateCookieFileName(sessionId, userContext) { // Check if this is an explicit sessionId (not auto-generated) // Explicit sessionIds typically follow patterns like "session_<timestamp>_<random>" const isExplicitSessionId = sessionId.startsWith('session_') || sessionId.includes('_') || sessionId.length > 20; // If an explicit sessionId is provided, use it directly for cookie storage // This ensures session persistence across different execution contexts if (isExplicitSessionId) { return `${sessionId}.json`; } // For auto-generated sessionIds (UUIDs), use userContext for isolation if (userContext?.userId && userContext?.sessionName) { // Create isolated session storage: userId_sessionName_sessionId.json const sanitizedUserId = userContext.userId.replace(/[^a-zA-Z0-9]/g, '_'); const sanitizedSessionName = userContext.sessionName.replace(/[^a-zA-Z0-9]/g, '_'); return `${sanitizedUserId}_${sanitizedSessionName}_${sessionId}.json`; } // Fallback to sessionId only return `${sessionId}.json`; } detectLocalChrome() { try { const platform = process.platform; let chromePath; if (platform === 'darwin') { chromePath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'; } else if (platform === 'win32') { const possiblePaths = [ 'C:\\\\Program Files\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe', 'C:\\\\Program Files (x86)\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe', process.env.LOCALAPPDATA + '\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe' ]; chromePath = possiblePaths.find(p => { try { require('fs').accessSync(p); return true; } catch { return false; } }) || ''; } else { try { chromePath = (0, child_process_1.execSync)('which google-chrome', { encoding: 'utf8' }).trim(); } catch { try { chromePath = (0, child_process_1.execSync)('which chromium-browser', { encoding: 'utf8' }).trim(); } catch { chromePath = '/usr/bin/google-chrome'; } } } if (chromePath) { require('fs').accessSync(chromePath); this.localChromePath = chromePath; console.log('Local Chrome detected at:', chromePath); } } catch (error) { console.log('Local Chrome not found, will use bundled Chromium'); this.localChromePath = null; } } async createSession(sessionId, config = {}, userContext) { // Close existing session if it exists if (this.sessions.has(sessionId)) { await this.closeSession(sessionId); } // Merge device configuration with session configuration (session config takes precedence) const deviceConfig = this.deviceRegistry?.getBrowserConfig() || {}; const finalConfig = { ...deviceConfig, ...config }; console.log('🔧 Using merged browser configuration:', { device: deviceConfig, session: config, final: finalConfig }); const browserType = finalConfig.browserType || 'chromium'; const launchOptions = { headless: finalConfig.headless ?? false, slowMo: finalConfig.slowMo ? parseInt(finalConfig.slowMo.toString()) : 0, }; // Add proxy configuration if provided if (finalConfig.proxy?.enabled && finalConfig.proxy.host && finalConfig.proxy.port) { launchOptions.proxy = { server: `http://${finalConfig.proxy.host}:${finalConfig.proxy.port}`, ...(finalConfig.proxy.username && finalConfig.proxy.password && { username: finalConfig.proxy.username, password: finalConfig.proxy.password }) }; } if (browserType === 'chromium' || browserType === 'chrome') { if (browserType === 'chrome' && this.localChromePath) { launchOptions.executablePath = this.localChromePath; } // Base stealth arguments let args = [ '--disable-blink-features=AutomationControlled', '--disable-features=VizDisplayCompositor', '--disable-web-security', '--disable-features=site-per-process' ]; // Add enhanced stealth mode arguments if enabled if (finalConfig.stealth !== false) { args = args.concat([ '--disable-background-timer-throttling', '--disable-backgrounding-occluded-windows', '--disable-renderer-backgrounding', '--disable-features=TranslateUI', '--disable-ipc-flooding-protection', '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-extensions', '--disable-plugins', '--disable-component-extensions-with-background-pages' ]); } // Add custom arguments if provided if (finalConfig.customArgs) { const customArgsList = finalConfig.customArgs.split('\n') .map(arg => arg.trim()) .filter(arg => arg && !arg.startsWith('#')); args = args.concat(customArgsList); } launchOptions.args = args; } let browser; switch (browserType) { case 'firefox': browser = await playwright_1.firefox.launch(launchOptions); break; case 'webkit': browser = await playwright_1.webkit.launch(launchOptions); break; case 'chrome': if (this.localChromePath) { browser = await playwright_1.chromium.launch(launchOptions); } else { throw new Error('Local Chrome not available. Please install Google Chrome or use Chromium instead.'); } break; case 'chromium': default: browser = await playwright_1.chromium.launch(launchOptions); break; } const contextOptions = { userAgent: finalConfig.userAgent || 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport: finalConfig.viewport || { width: 1366, height: 768 }, locale: 'en-US', timezoneId: 'America/New_York', // Better persistence for sites like LinkedIn acceptDownloads: true, ignoreHTTPSErrors: true, bypassCSP: true, javaScriptEnabled: true, permissions: ['geolocation', 'notifications'], // Preserve service workers for better auth persistence serviceWorkers: 'allow' }; if (finalConfig.timeout) { contextOptions.actionTimeout = parseInt(finalConfig.timeout.toString()); contextOptions.navigationTimeout = parseInt(finalConfig.timeout.toString()); } // Load cookies if they exist - use user-scoped session storage const cookieFileName = this.generateCookieFileName(sessionId, userContext); const cookiesPath = path_1.default.join(this.cookiesDir, cookieFileName); try { const cookiesData = await promises_1.default.readFile(cookiesPath, 'utf-8'); contextOptions.storageState = JSON.parse(cookiesData); console.log(`✅ Loaded session state from: ${cookieFileName}`); } catch (error) { console.log(`📝 Creating new session state: ${cookieFileName}`); } const context = await browser.newContext(contextOptions); // Anti-detection measures - Enhanced for LinkedIn await context.addInitScript(() => { // @ts-ignore - This runs in browser context Object.defineProperty(navigator, 'webdriver', { get: () => undefined, }); // @ts-ignore - This runs in browser context window.chrome = { runtime: {}, loadTimes: () => { }, csi: () => { }, app: {} }; // @ts-ignore - This runs in browser context Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5], }); // @ts-ignore - This runs in browser context Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'], }); // @ts-ignore - This runs in browser context // Fix for LinkedIn detection Object.defineProperty(navigator, 'permissions', { get: () => ({ query: () => Promise.resolve({ state: 'granted' }) }) }); // @ts-ignore - This runs in browser context // Ensure consistent hardware concurrency Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 }); // @ts-ignore - This runs in browser context // Ensure WebGL vendor/renderer consistency try { // @ts-ignore const getParameter = WebGLRenderingContext.prototype.getParameter; // @ts-ignore WebGLRenderingContext.prototype.getParameter = function (parameter) { if (parameter === 37445) { return 'Intel Inc.'; } if (parameter === 37446) { return 'Intel Iris OpenGL Engine'; } // @ts-ignore return getParameter.apply(this, arguments); }; } catch (e) { // WebGL might not be available in some contexts } }); const page = await context.newPage(); const now = new Date(); const session = { id: sessionId, browser, context, page, createdAt: now, lastUsed: now, userContext }; this.sessions.set(sessionId, session); console.log(`Created browser session: ${sessionId}`); return session; } async getSession(sessionId) { const session = this.sessions.get(sessionId); if (session) { session.lastUsed = new Date(); } return session; } async saveSession(sessionId) { const session = this.sessions.get(sessionId); if (!session) return; // Prevent multiple simultaneous saves of the same session if (this.savingStates.has(sessionId)) { console.log(`⏳ Session ${sessionId} already being saved, skipping...`); return; } this.savingStates.add(sessionId); try { const storageState = await session.context.storageState(); const cookieFileName = this.generateCookieFileName(sessionId, session.userContext); const cookiesPath = path_1.default.join(this.cookiesDir, cookieFileName); await promises_1.default.writeFile(cookiesPath, JSON.stringify(storageState, null, 2)); console.log(`💾 Saved session state to: ${cookieFileName}`); } catch (error) { console.error(`Failed to save session ${sessionId}:`, error); } finally { this.savingStates.delete(sessionId); } } async closeSession(sessionId) { const session = this.sessions.get(sessionId); if (!session) return; try { // Small delay to let any pending operations complete before closing await new Promise(resolve => setTimeout(resolve, 100)); // Close browser immediately without additional saves to prevent race conditions await session.browser.close(); this.sessions.delete(sessionId); console.log(`Closed browser session: ${sessionId}`); } catch (error) { console.error(`Failed to close session ${sessionId}:`, error); } } async executeAction(sessionId, action) { const session = this.sessions.get(sessionId); if (!session) { return { success: false, error: 'Session not found' }; } session.lastUsed = new Date(); const { page, context } = session; try { // Take before screenshot if requested let beforeScreenshot = null; if (action.captureScreenshots) { try { const screenshot = await page.screenshot({ fullPage: false }); beforeScreenshot = screenshot.toString('base64'); } catch (error) { console.warn('Failed to capture before screenshot:', error); } } let result = {}; // Handle custom Playwright code if (action.type === 'playwrightCode' && action.code) { const AsyncFunction = Object.getPrototypeOf(async function () { }).constructor; const executeCode = new AsyncFunction('page', 'context', 'variables', action.code); result = await executeCode(page, context, action.variables || {}); } else { // Handle standard action types result = await this.executeStandardAction(page, action); } // Take after screenshot if requested let afterScreenshot = null; if (action.captureScreenshots) { try { await new Promise(resolve => setTimeout(resolve, 500)); const screenshot = await page.screenshot({ fullPage: false }); afterScreenshot = screenshot.toString('base64'); } catch (error) { console.warn('Failed to capture after screenshot:', error); } } // Note: Session saving is handled by FlowExecutor to prevent multiple saves return { success: true, result, screenshots: action.captureScreenshots ? { before: beforeScreenshot, after: afterScreenshot } : undefined }; } catch (error) { return { success: false, error: `Action execution failed: ${error.message}` }; } } async executeStandardAction(page, action) { switch (action.type) { case 'navigate': await page.goto(action.url, { waitUntil: 'domcontentloaded' }); break; case 'click': await page.click(action.selector); break; case 'type': await page.fill(action.selector, action.text); break; case 'waitForSelector': await page.waitForSelector(action.selector, { timeout: action.timeout || 30000 }); break; case 'screenshot': const screenshot = await page.screenshot({ fullPage: action.fullPage || false }); return { screenshot: screenshot.toString('base64') }; case 'evaluate': const evalResult = await page.evaluate(action.script); return { result: evalResult }; case 'waitForNavigation': await page.waitForNavigation(); break; case 'press': await page.keyboard.press(action.text || 'Enter'); break; case 'hover': await page.hover(action.selector); break; case 'focus': await page.focus(action.selector); break; case 'scroll': if (action.selector) { await page.locator(action.selector).scrollIntoViewIfNeeded(); } else if (action.text) { try { const scrollOptions = JSON.parse(action.text); await page.evaluate((options) => { // @ts-ignore - This runs in browser context window.scrollTo(options.x || 0, options.y || 0); }, scrollOptions); } catch { if (action.text === 'top') { await page.evaluate(() => { // @ts-ignore - This runs in browser context window.scrollTo(0, 0); }); } else if (action.text === 'bottom') { await page.evaluate(() => { // @ts-ignore - This runs in browser context window.scrollTo(0, document.body.scrollHeight); }); } else { const y = parseInt(action.text) || 500; await page.evaluate((yPos) => { // @ts-ignore - This runs in browser context window.scrollTo(0, yPos); }, y); } } } else { await page.evaluate(() => { // @ts-ignore - This runs in browser context window.scrollBy(0, 500); }); } break; case 'reload': await page.reload(); break; case 'goBack': await page.goBack(); break; case 'goForward': await page.goForward(); break; case 'waitForTimeout': const timeout = parseInt(action.text || action.timeout?.toString() || '2000'); await page.waitForTimeout(timeout); break; case 'elementToMarkdown': if (!action.selector) { throw new Error('Selector is required for elementToMarkdown action'); } // Extract HTML content from the element const elementData = await page.evaluate((selector) => { // @ts-ignore - This code runs in browser context const element = document.querySelector(selector); if (!element) { throw new Error(`Element not found: ${selector}`); } // Clone the element and clean it up const clonedElement = element.cloneNode(true); // Remove script, style, and other unwanted elements const unwantedSelectors = [ 'script', 'style', 'noscript', 'iframe', 'svg', 'canvas', 'meta', 'link', 'head', 'title', '[hidden]', 'nav', 'aside', 'footer', 'header', '.ad', '.advertisement', '[role="banner"]', '[role="navigation"]', '[role="complementary"]', '.social-share', '.newsletter', '.popup', '.modal', '.tooltip', '.breadcrumb', '.pagination', '.sidebar', '.widget', 'button[type="button"]', 'input[type="hidden"]' ]; unwantedSelectors.forEach(selector => { try { const elements = clonedElement.querySelectorAll(selector); elements.forEach((el) => el.remove()); } catch (e) { // Continue if selector is invalid } }); // Remove elements that are not visible or have no content const allElements = clonedElement.querySelectorAll('*'); Array.from(allElements).forEach((el) => { // @ts-ignore - This code runs in browser context const style = window.getComputedStyle ? window.getComputedStyle(el) : null; const hasVisibleContent = el.textContent && el.textContent.trim().length > 0; const hasImages = el.tagName === 'IMG' || el.querySelector('img'); if (style && (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0' || style.position === 'absolute' && (style.left === '-9999px' || style.top === '-9999px'))) { el.remove(); } else if (!hasVisibleContent && !hasImages && el.children.length === 0) { // Remove empty elements with no content el.remove(); } }); // Clean up attributes that add noise const elementsWithAttrs = clonedElement.querySelectorAll('*'); Array.from(elementsWithAttrs).forEach((el) => { // Remove non-essential attributes const attrsToRemove = [ 'class', 'id', 'style', 'data-*', 'onclick', 'onload', 'onerror', 'role', 'aria-*', 'tabindex', 'contenteditable', 'draggable', 'spellcheck', 'translate', 'dir', 'lang', 'title' ]; // Get all attribute names const attrs = Array.from(el.attributes || []); attrs.forEach((attr) => { const attrName = attr.name.toLowerCase(); if (attrsToRemove.some(pattern => pattern.endsWith('*') ? attrName.startsWith(pattern.slice(0, -1)) : attrName === pattern)) { try { el.removeAttribute(attr.name); } catch (e) { // Continue if attribute removal fails } } }); }); // Remove empty paragraphs and divs const emptyElements = clonedElement.querySelectorAll('p:empty, div:empty, span:empty, h1:empty, h2:empty, h3:empty, h4:empty, h5:empty, h6:empty'); emptyElements.forEach((el) => el.remove()); // Normalize whitespace in text content const textElements = clonedElement.querySelectorAll('*'); Array.from(textElements).forEach((el) => { if (el.childNodes) { Array.from(el.childNodes).forEach((node) => { if (node.nodeType === 3) { // Text node // Normalize whitespace in text nodes node.textContent = node.textContent .replace(/\s+/g, ' ') // Multiple spaces to single space .replace(/^\s+|\s+$/g, ''); // Trim start/end whitespace } }); } }); // Clean up the HTML string itself let cleanHtml = clonedElement.innerHTML // Remove excessive whitespace between tags .replace(/>\s+</g, '><') // Remove whitespace at start/end of lines .replace(/^\s+|\s+$/gm, '') // Normalize line breaks .replace(/\n\s*\n/g, '\n') .trim(); return { html: cleanHtml, text: element.textContent?.trim() || '', outerHTML: cleanHtml }; }, action.selector); // Use node-html-markdown to convert HTML to markdown with enhanced configuration const nhm = new node_html_markdown_1.NodeHtmlMarkdown({ // Ignore these elements completely ignore: [ 'script', 'style', 'noscript', 'iframe', 'svg', 'canvas', 'nav', 'aside', 'footer', 'header', 'button', 'form', 'input', 'select', 'textarea', 'label', 'fieldset', 'legend' ], // Convert spans to text without markup unless they have meaningful content useLinkReferenceDefinitions: false, // More aggressive text extraction textReplace: [ // Remove multiple consecutive whitespaces [/\s+/g, ' '], // Remove leading/trailing whitespace from lines [/^\s+|\s+$/gm, ''], // Remove multiple consecutive line breaks [/\n\s*\n\s*\n/g, '\n\n'] ] }); let markdown = nhm.translate(elementData.html); // Post-process the markdown to clean it further markdown = markdown // Remove HTML comments that might have slipped through .replace(/<!--[\s\S]*?-->/g, '') // Remove empty markdown elements .replace(/^\s*\*\s*$/gm, '') // Empty list items .replace(/^\s*-\s*$/gm, '') // Empty list items with dashes .replace(/^\s*\d+\.\s*$/gm, '') // Empty numbered list items .replace(/^\s*#+\s*$/gm, '') // Empty headers // Remove standalone formatting that adds no value .replace(/^\*\*\s*\*\*$/gm, '') // Empty bold .replace(/^\*\s*\*$/gm, '') // Empty italic // Remove lines that are just punctuation or symbols .replace(/^[^\w\s]*$/gm, '') // Remove lines with only whitespace .replace(/^\s*$/gm, '') // Remove excessive whitespace and newlines .replace(/\n\s*\n\s*\n+/g, '\n\n') // Max 2 consecutive line breaks .replace(/\n{3,}/g, '\n\n') // No more than 2 consecutive newlines // Remove trailing spaces from each line .replace(/[ \t]+$/gm, '') // Remove leading spaces from each line (except for code blocks and lists) .replace(/^[ \t]+(?![*\-+]|[0-9]+\.| )/gm, '') // Clean up any remaining multiple spaces within lines .replace(/ {2,}/g, ' ') // Remove tabs and replace with single space .replace(/\t+/g, ' ') // Remove any remaining empty lines at the start or end .replace(/^\n+/, '') .replace(/\n+$/, '') // Final trim to remove any leading/trailing whitespace .trim(); return { markdown: markdown.trim(), text: elementData.text, html: elementData.html }; default: throw new Error(`Unknown action type: ${action.type}`); } return {}; } async cleanupOldSessions(maxAge = 3600000) { const now = new Date(); for (const [sessionId, session] of this.sessions.entries()) { if (now.getTime() - session.lastUsed.getTime() > maxAge) { console.log(`Cleaning up old session: ${sessionId}`); await this.closeSession(sessionId); } } } getActiveSessions() { return Array.from(this.sessions.keys()); } getSessionCount() { return this.sessions.size; } } exports.BrowserManager = BrowserManager; //# sourceMappingURL=BrowserManager.js.map