UNPKG

n8n-nodes-brightdata

Version:

Community node to work with BrightData service for scraping purposes

708 lines (706 loc) 38 kB
"use strict"; !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof self?self:{},n=(new e.Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="c58717a3-b4b3-58c9-a1b6-98e4732b77f7")}catch(e){}}(); Object.defineProperty(exports, "__esModule", { value: true }); exports.BrightData = void 0; require("./instrument"); const n8n_workflow_1 = require("n8n-workflow"); const SearchFunctions_1 = require("./SearchFunctions"); const GenericFunctions_1 = require("./GenericFunctions"); const WebUnlockerDescription_1 = require("./WebUnlockerDescription"); const MarketplaceDatasetDescription_1 = require("./MarketplaceDatasetDescription"); const WebScrapperDescription_1 = require("./WebScrapperDescription"); class BrightData { constructor() { this.description = { displayName: 'BrightData', name: 'brightData', icon: 'file:brightdatasquared.svg', group: ['transform'], version: 1, subtitle: '={{$parameter["operation"] + ": " + $parameter["resource"]}}', description: 'Interact with Bright Data to scrape websites or use existing datasets from the marketplace to generate adapted snapshots', defaults: { name: 'BrightData', }, usableAsTool: true, inputs: ["main"], outputs: ["main"], credentials: [ { name: 'brightdataApi', required: true, }, ], properties: [ { displayName: 'Resource', name: 'resource', type: 'options', noDataExpression: true, options: [ { name: 'Marketplace Dataset', value: 'marketplaceDataset', }, { name: 'Web Scrapper', value: 'webScrapper', }, { name: 'Web Unlocker', value: 'webUnlocker', }, ], default: 'webUnlocker', }, ...WebUnlockerDescription_1.webUnlockerOperations, ...WebUnlockerDescription_1.webUnlockerFields, ...MarketplaceDatasetDescription_1.marketplaceDatasetOperations, ...MarketplaceDatasetDescription_1.marketplaceDatasetFields, ...WebScrapperDescription_1.webScrapperOperations, ...WebScrapperDescription_1.webScrapperFields, ], }; this.methods = { listSearch: { getActiveZones: SearchFunctions_1.getActiveZones, getCountries: SearchFunctions_1.getCountries, getDataSets: SearchFunctions_1.getDataSets, }, }; } async execute() { const items = this.getInputData(); const returnData = []; const resource = this.getNodeParameter('resource', 0); const operation = this.getNodeParameter('operation', 0); if (resource === 'webScrapper') { if (operation === 'downloadSnapshot') { for (let i = 0; i < items.length; i++) { const snapshot_id = this.getNodeParameter('snapshot_id', i); if (!snapshot_id) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Snapshot ID is required'); } const format = this.getNodeParameter('format', i); const compress = this.getNodeParameter('compress', i); const batch_size = this.getNodeParameter('batch_size', i); const part = this.getNodeParameter('part', i); const qs = { format, compress, batch_size, part, }; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/v3/snapshot/${snapshot_id}`, {}, qs); returnData.push({ items: responseData }); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'monitorProgressSnapshot') { for (let i = 0; i < items.length; i++) { const snapshot_id = this.getNodeParameter('snapshot_id', i); if (!snapshot_id) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Snapshot ID is required'); } try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/v3/progress/${snapshot_id}`, {}); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'getSnapshots') { for (let i = 0; i < items.length; i++) { const dataset = this.getNodeParameter('dataset_id', i); if (dataset === undefined) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required'); } const dataset_id = dataset.value; const qs = { dataset_id, status: this.getNodeParameter('status', i), skip: this.getNodeParameter('skip', i), limit: this.getNodeParameter('limit', i), from_date: this.getNodeParameter('from_date', i), to_date: this.getNodeParameter('to_date', i), }; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', '/datasets/v3/snapshots', {}, qs); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'scrapeByUrl') { for (let i = 0; i < items.length; i++) { const dataset = this.getNodeParameter('dataset_id', i); if (dataset === undefined) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required'); } const dataset_id = dataset.value; console.log('Dataset ID:', dataset_id); const bodyString = this.getNodeParameter('urls', i); const format = this.getNodeParameter('format', i); const include_errors = this.getNodeParameter('include_errors', i); let body = null; try { body = JSON.parse(bodyString); } catch (error) { console.log('Error parsing JSON:', error); throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for URLs'); } if (!body) { console.log('Body is null or undefined'); throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL is required'); } const qs = { dataset_id, format, include_errors, }; try { console.log('Body:', body, 'qs:', qs); const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/datasets/v3/scrape', body, qs); console.log('Response:', responseData); try { if (typeof responseData === 'object') { const isArray = Array.isArray(responseData); if (isArray) { returnData.push(...responseData); } } else if (typeof responseData === 'string') { returnData.push({ data: responseData }); } } catch (error) { console.log('Error parsing response:', error); throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Error parsing response'); } } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'triggerCollectionByUrl') { for (let i = 0; i < items.length; i++) { const dataset = this.getNodeParameter('dataset_id', i); if (dataset === undefined) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required'); } const dataset_id = dataset.value; const bodyString = this.getNodeParameter('urls', i); let body = null; try { body = JSON.parse(bodyString); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for URLs'); } if (!body) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL is required'); } const notify = this.getNodeParameter('notify', i); const endpoint = this.getNodeParameter('endpoint', i); const qs = { dataset_id, endpoint, notify }; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/datasets/v3/trigger', body, qs); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'deliverSnapshot') { for (let i = 0; i < items.length; i++) { const notify = this.getNodeParameter('notify', i); const snapshot_id = this.getNodeParameter('snapshot_id', i); const deliver_type = this.getNodeParameter('deliver_type', i); let body = { deliver: { type: deliver_type, }, }; let qs = { notify }; switch (deliver_type) { case 'webhook': { body.deliver['endpoint'] = this.getNodeParameter('endpoint', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; break; } case 's3': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { 'aws-access-key': this.getNodeParameter('aws-access-key', i), 'aws-secret-key': this.getNodeParameter('aws-secret-key', i), }; const region = this.getNodeParameter('region', i); if (region) { body.deliver['region'] = region; } const roleArn = this.getNodeParameter('role_arn', i); if (roleArn) { body.deliver['credentials']['role_arn'] = roleArn; } const externalId = this.getNodeParameter('external_id', i); if (externalId) { body.deliver['credentials']['external_id'] = externalId; } break; } case 'ali_oss': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { 'access-key': this.getNodeParameter('access-key', i), 'secret-key': this.getNodeParameter('secret-key', i), }; const region = this.getNodeParameter('region', i); if (region) { body.deliver['region'] = region; } break; } case 'gcs_pubsub': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { client_email: this.getNodeParameter('client_email', i), private_key: this.getNodeParameter('private_key', i), }; body.deliver['topic_id'] = this.getNodeParameter('topic_id', i); body.deliver['attributes'] = this.getNodeParameter('attributes', i); break; } case 'gcs': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { client_email: this.getNodeParameter('client_email', i), private_key: this.getNodeParameter('private_key', i), }; break; } case 'azure': { body.deliver['container'] = this.getNodeParameter('container', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { account: this.getNodeParameter('account', i), key: this.getNodeParameter('key', i), sas_token: this.getNodeParameter('sas_token', i), }; break; } case 'sftp': { body.deliver['host'] = this.getNodeParameter('host', i); body.deliver['port'] = this.getNodeParameter('port', i); body.deliver['path'] = this.getNodeParameter('path', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { username: this.getNodeParameter('username', i), password: this.getNodeParameter('password', i), ssh_key: this.getNodeParameter('ssh_key', i), passphrase: this.getNodeParameter('passphrase', i), }; break; } case 'snowflake': { body.deliver['database'] = this.getNodeParameter('database', i); body.deliver['schema'] = this.getNodeParameter('schema', i); body.deliver['stage'] = this.getNodeParameter('stage', i); body.deliver['role'] = this.getNodeParameter('role', i); body.deliver['warehouse'] = this.getNodeParameter('warehouse', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { account: this.getNodeParameter('credentials.account', i), user: this.getNodeParameter('credentials.user', i), password: this.getNodeParameter('credentials.password', i), }; break; } default: break; } try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', `/datasets/v3/deliver/${snapshot_id}`, body, qs); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } } else if (resource === 'marketplaceDataset') { if (operation === 'listDatasets') { for (let i = 0; i < items.length; i++) { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', '/datasets/list', {}); returnData.push({ items: responseData }); } } else if (operation === 'filterDataset') { for (let i = 0; i < items.length; i++) { const dataset = this.getNodeParameter('dataset_id', i); if (dataset === undefined) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required'); } const dataset_id = dataset.value; const records_limit = this.getNodeParameter('records_limit', i); const filterType = this.getNodeParameter('filter_type', i); let body = { records_limit, dataset_id, }; if (filterType === 'filter_single') { const fieldName = this.getNodeParameter('field_name', i); let operator = this.getNodeParameter('field_operator', i); const fieldValue = this.getNodeParameter('field_value', i); if (operator == '==') { operator = '='; } body.filter = { name: fieldName, operator: operator, value: fieldValue, }; console.log('Filter:', body.filter); } else if (filterType === 'filters_group') { let filtersGroup; try { filtersGroup = JSON.parse(this.getNodeParameter('filters_group', i)); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for filters group'); } body.filter = filtersGroup; } else if (filterType === 'csv_filter') { body.filter = this.getNodeParameter('csv_filter', i); } else if (filterType === 'json_filter') { let jsonFilter; try { jsonFilter = JSON.parse(this.getNodeParameter('json_filter', i)); console.log(jsonFilter); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for JSON filter'); } body.filter = { operator: jsonFilter.operator, filters: jsonFilter.filters, }; } try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/datasets/filter', body); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'getDatasetMetadata') { for (let i = 0; i < items.length; i++) { const dataset = this.getNodeParameter('dataset_id', i); if (dataset === undefined) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required'); } const dataset_id = dataset.value; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/${dataset_id}/metadata`, {}); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'getSnapshotContent') { for (let i = 0; i < items.length; i++) { const snapshot_id = this.getNodeParameter('snapshot_id', i); const qs = { format: this.getNodeParameter('format', i), compress: this.getNodeParameter('compress', i), batch_size: this.getNodeParameter('batch_size', i), part: this.getNodeParameter('part', i), }; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/snapshots/${snapshot_id}/download`, {}, qs); returnData.push({ items: responseData, }); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'getSnapshotMetadata') { for (let i = 0; i < items.length; i++) { const snapshot_id = this.getNodeParameter('snapshot_id', i); try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/snapshots/${snapshot_id}`, {}); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'getSnapshotParts') { for (let i = 0; i < items.length; i++) { const snapshot_id = this.getNodeParameter('snapshot_id', i); try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/snapshots/${snapshot_id}/parts`, {}); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'listSnapshots') { for (let i = 0; i < items.length; i++) { const dataset = this.getNodeParameter('dataset_id', i); if (dataset === undefined) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required'); } const dataset_id = dataset.value; const view_id = this.getNodeParameter('view_id', i); const status = this.getNodeParameter('status', i); const qs = { dataset_id, view_id, status, }; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', '/datasets/snapshots', {}, qs); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } else if (operation === 'deliverSnapshot') { for (let i = 0; i < items.length; i++) { const snapshot_id = this.getNodeParameter('snapshot_id', i); const deliver_type = this.getNodeParameter('deliver_type', i); let body = { deliver: { type: deliver_type, }, }; switch (deliver_type) { case 'webhook': { body.deliver['endpoint'] = this.getNodeParameter('endpoint', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; break; } case 's3': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { 'aws-access-key': this.getNodeParameter('aws-access-key', i), 'aws-secret-key': this.getNodeParameter('aws-secret-key', i), }; const region = this.getNodeParameter('region', i); if (region) { body.deliver['region'] = region; } const roleArn = this.getNodeParameter('role_arn', i); if (roleArn) { body.deliver['credentials']['role_arn'] = roleArn; } const externalId = this.getNodeParameter('external_id', i); if (externalId) { body.deliver['credentials']['external_id'] = externalId; } break; } case 'ali_oss': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { 'access-key': this.getNodeParameter('access-key', i), 'secret-key': this.getNodeParameter('secret-key', i), }; const region = this.getNodeParameter('region', i); if (region) { body.deliver['region'] = region; } break; } case 'gcs_pubsub': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { client_email: this.getNodeParameter('client_email', i), private_key: this.getNodeParameter('private_key', i), }; body.deliver['topic_id'] = this.getNodeParameter('topic_id', i); body.deliver['attributes'] = this.getNodeParameter('attributes', i); break; } case 'gcs': { body.deliver['bucket'] = this.getNodeParameter('bucket', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { client_email: this.getNodeParameter('client_email', i), private_key: this.getNodeParameter('private_key', i), }; break; } case 'azure': { body.deliver['container'] = this.getNodeParameter('container', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { account: this.getNodeParameter('account', i), key: this.getNodeParameter('key', i), sas_token: this.getNodeParameter('sas_token', i), }; break; } case 'sftp': { body.deliver['host'] = this.getNodeParameter('host', i); body.deliver['port'] = this.getNodeParameter('port', i); body.deliver['path'] = this.getNodeParameter('path', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { username: this.getNodeParameter('username', i), password: this.getNodeParameter('password', i), ssh_key: this.getNodeParameter('ssh_key', i), passphrase: this.getNodeParameter('passphrase', i), }; break; } case 'snowflake': { body.deliver['database'] = this.getNodeParameter('database', i); body.deliver['schema'] = this.getNodeParameter('schema', i); body.deliver['stage'] = this.getNodeParameter('stage', i); body.deliver['role'] = this.getNodeParameter('role', i); body.deliver['warehouse'] = this.getNodeParameter('warehouse', i); body.deliver['filename'] = { template: this.getNodeParameter('filename_template', i), extension: this.getNodeParameter('filename_extension', i), }; body.deliver['credentials'] = { account: this.getNodeParameter('credentials.account', i), user: this.getNodeParameter('credentials.user', i), password: this.getNodeParameter('credentials.password', i), }; break; } default: break; } try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', `/datasets/snapshots/${snapshot_id}/deliver`, body); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } } else if (resource === 'webUnlocker') { if (operation === 'request') { for (let i = 0; i < items.length; i++) { const zoneData = this.getNodeParameter('zone', i); const countryData = this.getNodeParameter('country', i); const zone = zoneData.value; const country = countryData.value; const method = this.getNodeParameter('method', i); const url = this.getNodeParameter('url', i); const format = this.getNodeParameter('format', i); const body = { zone, country, method, url, format, }; try { const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/request', body); returnData.push(responseData); } catch (error) { throw new n8n_workflow_1.NodeOperationError(this.getNode(), error); } } } } return this.prepareOutputData(this.helpers.returnJsonArray(returnData)); } } exports.BrightData = BrightData; //# sourceMappingURL=BrightData.node.js.map //# debugId=c58717a3-b4b3-58c9-a1b6-98e4732b77f7