@monostate/browsernative-client
Version:
Browser Native client SDK for web scraping and content extraction API
466 lines (408 loc) • 15.3 kB
JavaScript
/**
* Browser Native Client SDK
*
* A lightweight JavaScript client for the Browser Native web scraping API.
* Works in browsers, Node.js, and edge environments.
*/
export class BrowserNativeClient {
constructor(apiKey, options = {}) {
if (!apiKey) {
throw new Error('API key is required. Get one at https://bnca.monostate.ai');
}
this.apiKey = apiKey;
this.baseUrl = options.baseUrl || 'https://bnca-api.fly.dev';
this.timeout = options.timeout || 30000;
this.retries = options.retries || 2;
this.verbose = options.verbose || false;
}
/**
* Scrape a webpage and extract structured content
* @param {string} url - The URL to scrape
* @param {object} options - Scraping options
* @returns {Promise<object>} Scraping result
*/
async scrape(url, options = {}) {
const payload = {
url,
screenshot: options.includeScreenshot || false,
...options
};
return this._makeRequest('/scrapeurl', payload);
}
/**
* Scrape a webpage and take a screenshot
* @param {string} url - The URL to scrape
* @param {object} options - Screenshot options
* @returns {Promise<object>} Screenshot result with base64 image
*/
async screenshot(url, options = {}) {
const payload = {
url,
screenshot: true,
...options
};
return this._makeRequest('/scrapeurl', payload);
}
/**
* Quick screenshot capture - optimized for speed
* @param {string} url - The URL to capture
* @param {object} options - Screenshot options
* @returns {Promise<object>} Screenshot result
*/
async quickshot(url, options = {}) {
const payload = {
url,
...options
};
return this._makeRequest('/quickshot', payload);
}
/**
* Extract content and answer questions using AI
* @param {string} url - The URL to analyze
* @param {string} question - The question to answer
* @param {object} options - Analysis options
* @returns {Promise<object>} AI analysis result
*/
async analyze(url, question, options = {}) {
const payload = {
url,
question,
screenshot: options.includeScreenshot || false,
...options
};
return this._makeRequest('/aireply', payload);
}
/**
* Scrape multiple URLs with concurrency control
* @param {string[]} urls - URLs to scrape
* @param {object} options - Bulk scraping options
* @returns {Promise<object>} Aggregated results
*/
async bulkScrape(urls, options = {}) {
const concurrency = options.concurrency || 5;
const continueOnError = options.continueOnError !== false;
const results = [];
const startTime = Date.now();
let processed = 0;
const queue = [...urls];
const workers = Array.from({ length: Math.min(concurrency, urls.length) }, async () => {
while (queue.length > 0) {
const url = queue.shift();
if (!url) break;
try {
const result = await this.scrape(url, options);
results.push({ url, ...result });
} catch (error) {
results.push({ url, success: false, error: error.message });
if (!continueOnError) throw error;
}
processed++;
if (options.progressCallback) {
options.progressCallback({
processed,
total: urls.length,
percentage: (processed / urls.length) * 100,
});
}
}
});
await Promise.all(workers);
const successful = results.filter(r => r.success).length;
return {
results,
stats: {
total: urls.length,
successful,
failed: urls.length - successful,
totalTime: Date.now() - startTime,
averageTime: Math.round((Date.now() - startTime) / urls.length),
},
};
}
/**
* Get account usage statistics
* @param {number} days - Number of days to fetch (max 30)
* @returns {Promise<object>} Usage statistics
*/
async getUsage(days = 30) {
return this._makeRequest('/stats', {}, 'GET');
}
/**
* Create a persistent browser session via WebSocket
* @param {object} options - Session options
* @param {'auto'|'headless'|'visual'|'computer-use'} options.mode - Browser mode (default: 'auto')
* @param {number} [options.screenWidth] - Screen width for computer-use mode
* @param {number} [options.screenHeight] - Screen height for computer-use mode
* @returns {Promise<BrowserSession>} Connected browser session
*/
async createSession(options = {}) {
const wsUrl = this.baseUrl.replace(/^http/, 'ws');
const mode = options.mode || 'auto';
const params = new URLSearchParams({
apiKey: this.apiKey,
mode,
});
if (options.screenWidth) params.set('screenWidth', String(options.screenWidth));
if (options.screenHeight) params.set('screenHeight', String(options.screenHeight));
const url = `${wsUrl}/session?${params}`;
const session = new BrowserSession(url, { verbose: this.verbose });
await session.connect();
return session;
}
/**
* Check API health and your account status
* @returns {Promise<object>} Health check result
*/
async healthCheck() {
return this._makeRequest('/health', {}, 'GET');
}
/**
* Make an authenticated request to the API
* @private
*/
async _makeRequest(endpoint, payload = {}, method = 'POST', queryParams = '') {
const url = `${this.baseUrl}${endpoint}${queryParams}`;
const startTime = Date.now();
let lastError;
for (let attempt = 1; attempt <= this.retries + 1; attempt++) {
try {
if (this.verbose) {
console.log(`Browser Native: ${method} ${url} (attempt ${attempt})`);
}
const options = {
method,
headers: {
'x-api-key': this.apiKey,
'Content-Type': 'application/json',
'User-Agent': 'Browser Native Client SDK/2.0.0'
}
};
if (method !== 'GET') {
options.body = JSON.stringify(payload);
}
// Set up timeout
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
options.signal = controller.signal;
const response = await fetch(url, options);
clearTimeout(timeoutId);
const responseTime = Date.now() - startTime;
if (!response.ok) {
const errorData = await response.json().catch(() => ({ error: 'Unknown error' }));
throw new Error(`API Error (${response.status}): ${errorData.error || response.statusText}`);
}
const data = await response.json();
if (this.verbose) {
console.log(`Browser Native: Request completed in ${responseTime}ms`);
}
return {
success: true,
data,
responseTime,
attempt
};
} catch (error) {
lastError = error;
if (attempt <= this.retries && !error.name === 'AbortError') {
const delay = Math.pow(2, attempt - 1) * 1000; // Exponential backoff
if (this.verbose) {
console.log(`Browser Native: Attempt ${attempt} failed, retrying in ${delay}ms...`);
}
await new Promise(resolve => setTimeout(resolve, delay));
continue;
}
break;
}
}
// All retries failed
return {
success: false,
error: lastError.message || 'Request failed',
responseTime: Date.now() - startTime
};
}
}
/**
* Convenience function for quick scraping without instantiating a client
* @param {string} url - The URL to scrape
* @param {string} apiKey - Your API key
* @param {object} options - Additional options
* @returns {Promise<object>} Scraping result
*/
export async function quickScrape(url, apiKey, options = {}) {
const client = new BrowserNativeClient(apiKey, options);
return client.scrape(url, options);
}
/**
* Convenience function for taking screenshots
* @param {string} url - The URL to capture
* @param {string} apiKey - Your API key
* @param {object} options - Additional options
* @returns {Promise<object>} Screenshot result
*/
export async function quickScreenshot(url, apiKey, options = {}) {
const client = new BrowserNativeClient(apiKey, options);
return client.screenshot(url, options);
}
/**
* Convenience function for AI analysis
* @param {string} url - The URL to analyze
* @param {string} question - The question to answer
* @param {string} apiKey - Your API key
* @param {object} options - Additional options
* @returns {Promise<object>} Analysis result
*/
export async function quickAnalyze(url, question, apiKey, options = {}) {
const client = new BrowserNativeClient(apiKey, options);
return client.analyze(url, question, options);
}
/**
* Convenience function for quick screenshot capture
* @param {string} url - The URL to capture
* @param {string} apiKey - Your API key
* @param {object} options - Additional options
* @returns {Promise<object>} Screenshot result
*/
export async function quickShot(url, apiKey, options = {}) {
const client = new BrowserNativeClient(apiKey, options);
return client.quickshot(url, options);
}
/**
* Convenience function for bulk scraping multiple URLs
* @param {string[]} urls - URLs to scrape
* @param {string} apiKey - Your API key
* @param {object} options - Bulk options (concurrency, continueOnError, progressCallback)
* @returns {Promise<object>} Aggregated results
*/
export async function bulkScrape(urls, apiKey, options = {}) {
const client = new BrowserNativeClient(apiKey, options);
return client.bulkScrape(urls, options);
}
// ── Browser Session (WebSocket) ──────────────────────────────────────────────
class BrowserSession {
constructor(url, options = {}) {
this._url = url;
this._ws = null;
this._nextId = 1;
this._pending = new Map(); // id → { resolve, reject }
this._verbose = options.verbose || false;
this._eventHandlers = {};
this.sessionId = null;
this.backend = null;
this.vncUrl = null;
}
async connect() {
return new Promise((resolve, reject) => {
const WebSocketImpl = typeof WebSocket !== 'undefined'
? WebSocket
: null;
if (!WebSocketImpl) {
// Node.js — try dynamic import
import('ws').then(ws => {
this._ws = new ws.default(this._url);
this._wireEvents(resolve, reject);
}).catch(() => {
reject(new Error('WebSocket not available. Install "ws" package for Node.js: npm install ws'));
});
return;
}
this._ws = new WebSocketImpl(this._url);
this._wireEvents(resolve, reject);
});
}
_wireEvents(onConnected, onError) {
this._ws.onmessage = (event) => {
const data = typeof event.data === 'string' ? event.data : event.data.toString();
let msg;
try { msg = JSON.parse(data); } catch { return; }
if (msg.type === 'connected') {
this.sessionId = msg.sessionId;
this.backend = msg.backend;
this.vncUrl = msg.vncUrl || null;
if (this._verbose) console.log(`Browser Native: Session ${msg.sessionId} connected (${msg.backend})`);
onConnected(this);
return;
}
if (msg.type === 'fallback') {
this.backend = msg.to;
if (this._verbose) console.log(`Browser Native: Fallback ${msg.from} → ${msg.to} (${msg.reason})`);
if (this._eventHandlers.fallback) this._eventHandlers.fallback(msg);
return;
}
if (msg.type === 'closed') {
if (this._verbose) console.log(`Browser Native: Session closed (${msg.reason})`);
if (this._eventHandlers.closed) this._eventHandlers.closed(msg);
return;
}
// Match response to pending request by id
if (msg.id != null && this._pending.has(msg.id)) {
const { resolve, reject } = this._pending.get(msg.id);
this._pending.delete(msg.id);
if (msg.type === 'error') {
reject(new Error(msg.message));
} else {
if (msg.backend) this.backend = msg.backend;
resolve(msg.data);
}
}
};
this._ws.onerror = (err) => {
onError(new Error(err.message || 'WebSocket connection failed'));
};
this._ws.onclose = (event) => {
// Reject all pending requests
for (const [, { reject }] of this._pending) {
reject(new Error(`Session closed: ${event.reason || 'unknown'}`));
}
this._pending.clear();
};
}
_send(action, params = {}) {
return new Promise((resolve, reject) => {
if (!this._ws || this._ws.readyState !== 1) {
return reject(new Error('Session not connected'));
}
const id = this._nextId++;
this._pending.set(id, { resolve, reject });
this._ws.send(JSON.stringify({ id, action, ...params }));
});
}
on(event, handler) {
this._eventHandlers[event] = handler;
return this;
}
async goto(url) { return this._send('goto', { url }); }
async click(selector, options = {}) { return this._send('click', { selector, ...options }); }
async type(selector, text, options = {}) { return this._send('type', { selector, text, ...options }); }
async scroll(direction = 'down', amount = 500) { return this._send('scroll', { direction, amount }); }
async hover(selector) { return this._send('hover', { selector }); }
async select(selector, ...values) { return this._send('select', { selector, values }); }
async pressKey(key) { return this._send('pressKey', { key }); }
async goBack() { return this._send('goBack'); }
async goForward() { return this._send('goForward'); }
async screenshot(options = {}) { return this._send('screenshot', options); }
async getPageState(options = {}) { return this._send('getPageState', options); }
async extractContent() { return this._send('extractContent'); }
async waitFor(selector, timeout) { return this._send('waitFor', { selector, timeout }); }
async evaluate(fn) { return this._send('evaluate', { fn }); }
async getCookies() { return this._send('getCookies'); }
async setCookies(cookies) { return this._send('setCookies', { cookies }); }
// Coordinate-based actions (computer-use mode)
async mouseMove(x, y) { return this._send('mouseMove', { x, y }); }
async clickAt(x, y, button = 'left') { return this._send('clickAt', { x, y, button }); }
async doubleClickAt(x, y, button = 'left') { return this._send('doubleClickAt', { x, y, button }); }
async drag(startX, startY, endX, endY) { return this._send('drag', { startX, startY, endX, endY }); }
async scrollAt(x, y, direction, amount) { return this._send('scrollAt', { x, y, direction, amount }); }
async typeText(text) { return this._send('typeText', { text }); }
async getCursorPosition() { return this._send('getCursorPosition'); }
async getScreenSize() { return this._send('getScreenSize'); }
close() {
if (this._ws) {
this._ws.close();
this._ws = null;
}
}
}
export { BrowserSession };
// Default export for CommonJS compatibility
export default BrowserNativeClient;