n8n-nodes-brightdata
Version:
Community node to work with BrightData service for scraping purposes
708 lines (706 loc) • 38 kB
JavaScript
"use strict";
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof self?self:{},n=(new e.Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="c58717a3-b4b3-58c9-a1b6-98e4732b77f7")}catch(e){}}();
Object.defineProperty(exports, "__esModule", { value: true });
exports.BrightData = void 0;
require("./instrument");
const n8n_workflow_1 = require("n8n-workflow");
const SearchFunctions_1 = require("./SearchFunctions");
const GenericFunctions_1 = require("./GenericFunctions");
const WebUnlockerDescription_1 = require("./WebUnlockerDescription");
const MarketplaceDatasetDescription_1 = require("./MarketplaceDatasetDescription");
const WebScrapperDescription_1 = require("./WebScrapperDescription");
class BrightData {
constructor() {
this.description = {
displayName: 'BrightData',
name: 'brightData',
icon: 'file:brightdatasquared.svg',
group: ['transform'],
version: 1,
subtitle: '={{$parameter["operation"] + ": " + $parameter["resource"]}}',
description: 'Interact with Bright Data to scrape websites or use existing datasets from the marketplace to generate adapted snapshots',
defaults: {
name: 'BrightData',
},
usableAsTool: true,
inputs: ["main"],
outputs: ["main"],
credentials: [
{
name: 'brightdataApi',
required: true,
},
],
properties: [
{
displayName: 'Resource',
name: 'resource',
type: 'options',
noDataExpression: true,
options: [
{
name: 'Marketplace Dataset',
value: 'marketplaceDataset',
},
{
name: 'Web Scrapper',
value: 'webScrapper',
},
{
name: 'Web Unlocker',
value: 'webUnlocker',
},
],
default: 'webUnlocker',
},
...WebUnlockerDescription_1.webUnlockerOperations,
...WebUnlockerDescription_1.webUnlockerFields,
...MarketplaceDatasetDescription_1.marketplaceDatasetOperations,
...MarketplaceDatasetDescription_1.marketplaceDatasetFields,
...WebScrapperDescription_1.webScrapperOperations,
...WebScrapperDescription_1.webScrapperFields,
],
};
this.methods = {
listSearch: {
getActiveZones: SearchFunctions_1.getActiveZones,
getCountries: SearchFunctions_1.getCountries,
getDataSets: SearchFunctions_1.getDataSets,
},
};
}
async execute() {
const items = this.getInputData();
const returnData = [];
const resource = this.getNodeParameter('resource', 0);
const operation = this.getNodeParameter('operation', 0);
if (resource === 'webScrapper') {
if (operation === 'downloadSnapshot') {
for (let i = 0; i < items.length; i++) {
const snapshot_id = this.getNodeParameter('snapshot_id', i);
if (!snapshot_id) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Snapshot ID is required');
}
const format = this.getNodeParameter('format', i);
const compress = this.getNodeParameter('compress', i);
const batch_size = this.getNodeParameter('batch_size', i);
const part = this.getNodeParameter('part', i);
const qs = {
format,
compress,
batch_size,
part,
};
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/v3/snapshot/${snapshot_id}`, {}, qs);
returnData.push({ items: responseData });
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'monitorProgressSnapshot') {
for (let i = 0; i < items.length; i++) {
const snapshot_id = this.getNodeParameter('snapshot_id', i);
if (!snapshot_id) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Snapshot ID is required');
}
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/v3/progress/${snapshot_id}`, {});
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'getSnapshots') {
for (let i = 0; i < items.length; i++) {
const dataset = this.getNodeParameter('dataset_id', i);
if (dataset === undefined) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required');
}
const dataset_id = dataset.value;
const qs = {
dataset_id,
status: this.getNodeParameter('status', i),
skip: this.getNodeParameter('skip', i),
limit: this.getNodeParameter('limit', i),
from_date: this.getNodeParameter('from_date', i),
to_date: this.getNodeParameter('to_date', i),
};
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', '/datasets/v3/snapshots', {}, qs);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'scrapeByUrl') {
for (let i = 0; i < items.length; i++) {
const dataset = this.getNodeParameter('dataset_id', i);
if (dataset === undefined) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required');
}
const dataset_id = dataset.value;
console.log('Dataset ID:', dataset_id);
const bodyString = this.getNodeParameter('urls', i);
const format = this.getNodeParameter('format', i);
const include_errors = this.getNodeParameter('include_errors', i);
let body = null;
try {
body = JSON.parse(bodyString);
}
catch (error) {
console.log('Error parsing JSON:', error);
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for URLs');
}
if (!body) {
console.log('Body is null or undefined');
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL is required');
}
const qs = {
dataset_id,
format,
include_errors,
};
try {
console.log('Body:', body, 'qs:', qs);
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/datasets/v3/scrape', body, qs);
console.log('Response:', responseData);
try {
if (typeof responseData === 'object') {
const isArray = Array.isArray(responseData);
if (isArray) {
returnData.push(...responseData);
}
}
else if (typeof responseData === 'string') {
returnData.push({ data: responseData });
}
}
catch (error) {
console.log('Error parsing response:', error);
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Error parsing response');
}
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'triggerCollectionByUrl') {
for (let i = 0; i < items.length; i++) {
const dataset = this.getNodeParameter('dataset_id', i);
if (dataset === undefined) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required');
}
const dataset_id = dataset.value;
const bodyString = this.getNodeParameter('urls', i);
let body = null;
try {
body = JSON.parse(bodyString);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for URLs');
}
if (!body) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL is required');
}
const notify = this.getNodeParameter('notify', i);
const endpoint = this.getNodeParameter('endpoint', i);
const qs = {
dataset_id,
endpoint,
notify
};
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/datasets/v3/trigger', body, qs);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'deliverSnapshot') {
for (let i = 0; i < items.length; i++) {
const notify = this.getNodeParameter('notify', i);
const snapshot_id = this.getNodeParameter('snapshot_id', i);
const deliver_type = this.getNodeParameter('deliver_type', i);
let body = {
deliver: {
type: deliver_type,
},
};
let qs = {
notify
};
switch (deliver_type) {
case 'webhook': {
body.deliver['endpoint'] = this.getNodeParameter('endpoint', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
break;
}
case 's3': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
'aws-access-key': this.getNodeParameter('aws-access-key', i),
'aws-secret-key': this.getNodeParameter('aws-secret-key', i),
};
const region = this.getNodeParameter('region', i);
if (region) {
body.deliver['region'] = region;
}
const roleArn = this.getNodeParameter('role_arn', i);
if (roleArn) {
body.deliver['credentials']['role_arn'] = roleArn;
}
const externalId = this.getNodeParameter('external_id', i);
if (externalId) {
body.deliver['credentials']['external_id'] =
externalId;
}
break;
}
case 'ali_oss': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
'access-key': this.getNodeParameter('access-key', i),
'secret-key': this.getNodeParameter('secret-key', i),
};
const region = this.getNodeParameter('region', i);
if (region) {
body.deliver['region'] = region;
}
break;
}
case 'gcs_pubsub': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
client_email: this.getNodeParameter('client_email', i),
private_key: this.getNodeParameter('private_key', i),
};
body.deliver['topic_id'] = this.getNodeParameter('topic_id', i);
body.deliver['attributes'] = this.getNodeParameter('attributes', i);
break;
}
case 'gcs': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
client_email: this.getNodeParameter('client_email', i),
private_key: this.getNodeParameter('private_key', i),
};
break;
}
case 'azure': {
body.deliver['container'] = this.getNodeParameter('container', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
account: this.getNodeParameter('account', i),
key: this.getNodeParameter('key', i),
sas_token: this.getNodeParameter('sas_token', i),
};
break;
}
case 'sftp': {
body.deliver['host'] = this.getNodeParameter('host', i);
body.deliver['port'] = this.getNodeParameter('port', i);
body.deliver['path'] = this.getNodeParameter('path', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
username: this.getNodeParameter('username', i),
password: this.getNodeParameter('password', i),
ssh_key: this.getNodeParameter('ssh_key', i),
passphrase: this.getNodeParameter('passphrase', i),
};
break;
}
case 'snowflake': {
body.deliver['database'] = this.getNodeParameter('database', i);
body.deliver['schema'] = this.getNodeParameter('schema', i);
body.deliver['stage'] = this.getNodeParameter('stage', i);
body.deliver['role'] = this.getNodeParameter('role', i);
body.deliver['warehouse'] = this.getNodeParameter('warehouse', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
account: this.getNodeParameter('credentials.account', i),
user: this.getNodeParameter('credentials.user', i),
password: this.getNodeParameter('credentials.password', i),
};
break;
}
default:
break;
}
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', `/datasets/v3/deliver/${snapshot_id}`, body, qs);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
}
else if (resource === 'marketplaceDataset') {
if (operation === 'listDatasets') {
for (let i = 0; i < items.length; i++) {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', '/datasets/list', {});
returnData.push({ items: responseData });
}
}
else if (operation === 'filterDataset') {
for (let i = 0; i < items.length; i++) {
const dataset = this.getNodeParameter('dataset_id', i);
if (dataset === undefined) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required');
}
const dataset_id = dataset.value;
const records_limit = this.getNodeParameter('records_limit', i);
const filterType = this.getNodeParameter('filter_type', i);
let body = {
records_limit,
dataset_id,
};
if (filterType === 'filter_single') {
const fieldName = this.getNodeParameter('field_name', i);
let operator = this.getNodeParameter('field_operator', i);
const fieldValue = this.getNodeParameter('field_value', i);
if (operator == '==') {
operator = '=';
}
body.filter = {
name: fieldName,
operator: operator,
value: fieldValue,
};
console.log('Filter:', body.filter);
}
else if (filterType === 'filters_group') {
let filtersGroup;
try {
filtersGroup = JSON.parse(this.getNodeParameter('filters_group', i));
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for filters group');
}
body.filter = filtersGroup;
}
else if (filterType === 'csv_filter') {
body.filter = this.getNodeParameter('csv_filter', i);
}
else if (filterType === 'json_filter') {
let jsonFilter;
try {
jsonFilter = JSON.parse(this.getNodeParameter('json_filter', i));
console.log(jsonFilter);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid JSON format for JSON filter');
}
body.filter = {
operator: jsonFilter.operator,
filters: jsonFilter.filters,
};
}
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/datasets/filter', body);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'getDatasetMetadata') {
for (let i = 0; i < items.length; i++) {
const dataset = this.getNodeParameter('dataset_id', i);
if (dataset === undefined) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required');
}
const dataset_id = dataset.value;
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/${dataset_id}/metadata`, {});
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'getSnapshotContent') {
for (let i = 0; i < items.length; i++) {
const snapshot_id = this.getNodeParameter('snapshot_id', i);
const qs = {
format: this.getNodeParameter('format', i),
compress: this.getNodeParameter('compress', i),
batch_size: this.getNodeParameter('batch_size', i),
part: this.getNodeParameter('part', i),
};
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/snapshots/${snapshot_id}/download`, {}, qs);
returnData.push({
items: responseData,
});
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'getSnapshotMetadata') {
for (let i = 0; i < items.length; i++) {
const snapshot_id = this.getNodeParameter('snapshot_id', i);
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/snapshots/${snapshot_id}`, {});
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'getSnapshotParts') {
for (let i = 0; i < items.length; i++) {
const snapshot_id = this.getNodeParameter('snapshot_id', i);
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', `/datasets/snapshots/${snapshot_id}/parts`, {});
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'listSnapshots') {
for (let i = 0; i < items.length; i++) {
const dataset = this.getNodeParameter('dataset_id', i);
if (dataset === undefined) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Dataset ID is required');
}
const dataset_id = dataset.value;
const view_id = this.getNodeParameter('view_id', i);
const status = this.getNodeParameter('status', i);
const qs = {
dataset_id,
view_id,
status,
};
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'GET', '/datasets/snapshots', {}, qs);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
else if (operation === 'deliverSnapshot') {
for (let i = 0; i < items.length; i++) {
const snapshot_id = this.getNodeParameter('snapshot_id', i);
const deliver_type = this.getNodeParameter('deliver_type', i);
let body = {
deliver: {
type: deliver_type,
},
};
switch (deliver_type) {
case 'webhook': {
body.deliver['endpoint'] = this.getNodeParameter('endpoint', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
break;
}
case 's3': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
'aws-access-key': this.getNodeParameter('aws-access-key', i),
'aws-secret-key': this.getNodeParameter('aws-secret-key', i),
};
const region = this.getNodeParameter('region', i);
if (region) {
body.deliver['region'] = region;
}
const roleArn = this.getNodeParameter('role_arn', i);
if (roleArn) {
body.deliver['credentials']['role_arn'] = roleArn;
}
const externalId = this.getNodeParameter('external_id', i);
if (externalId) {
body.deliver['credentials']['external_id'] =
externalId;
}
break;
}
case 'ali_oss': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
'access-key': this.getNodeParameter('access-key', i),
'secret-key': this.getNodeParameter('secret-key', i),
};
const region = this.getNodeParameter('region', i);
if (region) {
body.deliver['region'] = region;
}
break;
}
case 'gcs_pubsub': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
client_email: this.getNodeParameter('client_email', i),
private_key: this.getNodeParameter('private_key', i),
};
body.deliver['topic_id'] = this.getNodeParameter('topic_id', i);
body.deliver['attributes'] = this.getNodeParameter('attributes', i);
break;
}
case 'gcs': {
body.deliver['bucket'] = this.getNodeParameter('bucket', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
client_email: this.getNodeParameter('client_email', i),
private_key: this.getNodeParameter('private_key', i),
};
break;
}
case 'azure': {
body.deliver['container'] = this.getNodeParameter('container', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
account: this.getNodeParameter('account', i),
key: this.getNodeParameter('key', i),
sas_token: this.getNodeParameter('sas_token', i),
};
break;
}
case 'sftp': {
body.deliver['host'] = this.getNodeParameter('host', i);
body.deliver['port'] = this.getNodeParameter('port', i);
body.deliver['path'] = this.getNodeParameter('path', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
username: this.getNodeParameter('username', i),
password: this.getNodeParameter('password', i),
ssh_key: this.getNodeParameter('ssh_key', i),
passphrase: this.getNodeParameter('passphrase', i),
};
break;
}
case 'snowflake': {
body.deliver['database'] = this.getNodeParameter('database', i);
body.deliver['schema'] = this.getNodeParameter('schema', i);
body.deliver['stage'] = this.getNodeParameter('stage', i);
body.deliver['role'] = this.getNodeParameter('role', i);
body.deliver['warehouse'] = this.getNodeParameter('warehouse', i);
body.deliver['filename'] = {
template: this.getNodeParameter('filename_template', i),
extension: this.getNodeParameter('filename_extension', i),
};
body.deliver['credentials'] = {
account: this.getNodeParameter('credentials.account', i),
user: this.getNodeParameter('credentials.user', i),
password: this.getNodeParameter('credentials.password', i),
};
break;
}
default:
break;
}
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', `/datasets/snapshots/${snapshot_id}/deliver`, body);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
}
else if (resource === 'webUnlocker') {
if (operation === 'request') {
for (let i = 0; i < items.length; i++) {
const zoneData = this.getNodeParameter('zone', i);
const countryData = this.getNodeParameter('country', i);
const zone = zoneData.value;
const country = countryData.value;
const method = this.getNodeParameter('method', i);
const url = this.getNodeParameter('url', i);
const format = this.getNodeParameter('format', i);
const body = {
zone,
country,
method,
url,
format,
};
try {
const responseData = await GenericFunctions_1.brightdataApiRequest.call(this, 'POST', '/request', body);
returnData.push(responseData);
}
catch (error) {
throw new n8n_workflow_1.NodeOperationError(this.getNode(), error);
}
}
}
}
return this.prepareOutputData(this.helpers.returnJsonArray(returnData));
}
}
exports.BrightData = BrightData;
//# sourceMappingURL=BrightData.node.js.map
//# debugId=c58717a3-b4b3-58c9-a1b6-98e4732b77f7