dtamind-components
Version:
Apps integration for Dtamind. Contain Nodes and Credentials.
238 lines • 8.61 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.OxylabsLoader = void 0;
const base_1 = require("langchain/document_loaders/base");
const utils_1 = require("../../../src/utils");
const axios_1 = __importDefault(require("axios"));
class OxylabsLoader extends base_1.BaseDocumentLoader {
constructor(loaderParams) {
super();
this.params = loaderParams;
}
async sendAPIRequest(params) {
params = Object.fromEntries(Object.entries(params).filter(([_, value]) => value !== null && value !== '' && value !== undefined));
const auth = Buffer.from(`${this.params.username}:${this.params.password}`).toString('base64');
const response = await axios_1.default.post('https://realtime.oxylabs.io/v1/queries', params, {
headers: {
'Content-Type': 'application/json',
'x-oxylabs-sdk': 'oxylabs-integration-dtamind/1.0.0 (1.0.0; 64bit)',
Authorization: `Basic ${auth}`
}
});
if (response.status >= 400) {
throw new Error(`Oxylabs: Failed to call Oxylabs API: ${response.status}`);
}
return response;
}
async load() {
const response = await this.sendAPIRequest({
url: this.params.query,
source: this.params.source,
geo_location: this.params.geo_location,
render: this.params.render,
parse: this.params.parse,
user_agent_type: this.params.user_agent_type
});
const docs = response.data.results.map((result, index) => ({
id: `${response.data.job.id.toString()}-${index}`,
pageContent: result.content,
metadata: {}
}));
return docs;
}
}
exports.OxylabsLoader = OxylabsLoader;
class Oxylabs_DocumentLoaders {
constructor() {
this.label = 'Oxylabs';
this.name = 'oxylabs';
this.type = 'Document';
this.icon = 'oxylabs.svg';
this.version = 1.0;
this.category = 'Document Loaders';
this.description = 'Extract data from URLs using Oxylabs';
this.baseClasses = [this.type];
this.credential = {
label: 'Oxylabs API',
name: 'credential',
type: 'credential',
credentialNames: ['oxylabsApi']
};
this.inputs = [
{
label: 'Text Splitter',
name: 'textSplitter',
type: 'TextSplitter',
optional: false
},
{
label: 'Query',
name: 'query',
type: 'string',
description: 'Website URL of query keyword.'
},
{
label: 'Source',
name: 'source',
type: 'options',
description: 'Target website to scrape.',
options: [
{
label: 'Universal',
name: 'universal'
},
{
label: 'Google Search',
name: 'google_search'
},
{
label: 'Amazon Product',
name: 'amazon_product'
},
{
label: 'Amazon Search',
name: 'amazon_search'
}
],
default: 'universal'
},
{
label: 'Geolocation',
name: 'geo_location',
type: 'string',
description: "Sets the proxy's geo location to retrieve data. Check Oxylabs documentation for more details.",
optional: true
},
{
label: 'Render',
name: 'render',
type: 'boolean',
description: 'Enables JavaScript rendering when set to true.',
optional: true,
default: false
},
{
label: 'Parse',
name: 'parse',
type: 'boolean',
description: "Returns parsed data when set to true, as long as a dedicated parser exists for the submitted URL's page type.",
optional: true,
default: false
},
{
label: 'User Agent Type',
name: 'user_agent_type',
type: 'options',
description: 'Device type and browser.',
options: [
{
label: 'Desktop',
name: 'desktop'
},
{
label: 'Desktop Chrome',
name: 'desktop_chrome'
},
{
label: 'Desktop Edge',
name: 'desktop_edge'
},
{
label: 'Desktop Firefox',
name: 'desktop_firefox'
},
{
label: 'Desktop Opera',
name: 'desktop_opera'
},
{
label: 'Desktop Safari',
name: 'desktop_safari'
},
{
label: 'Mobile',
name: 'mobile'
},
{
label: 'Mobile Android',
name: 'mobile_android'
},
{
label: 'Mobile iOS',
name: 'mobile_ios'
},
{
label: 'Tablet',
name: 'tablet'
},
{
label: 'Tablet Android',
name: 'tablet_android'
},
{
label: 'Tablet iOS',
name: 'tablet_ios'
}
],
optional: true
}
];
this.outputs = [
{
label: 'Document',
name: 'document',
description: 'Array of document objects containing metadata and pageContent',
baseClasses: [...this.baseClasses, 'json']
},
{
label: 'Text',
name: 'text',
description: 'Concatenated string from pageContent of documents',
baseClasses: ['string', 'json']
}
];
}
async init(nodeData, _, options) {
const query = nodeData.inputs?.query;
const textSplitter = nodeData.inputs?.textSplitter;
const source = nodeData.inputs?.source;
const geo_location = nodeData.inputs?.geo_location;
const render = nodeData.inputs?.render;
const parse = nodeData.inputs?.parse;
const user_agent_type = nodeData.inputs?.user_agent_type;
const credentialData = await (0, utils_1.getCredentialData)(nodeData.credential ?? '', options);
const username = (0, utils_1.getCredentialParam)('username', credentialData, nodeData);
const password = (0, utils_1.getCredentialParam)('password', credentialData, nodeData);
const output = nodeData.outputs?.output;
const input = {
username,
password,
query,
source,
geo_location,
render,
parse,
user_agent_type
};
const loader = new OxylabsLoader(input);
let docs = await loader.load();
if (textSplitter && docs.length > 0) {
docs = await textSplitter.splitDocuments(docs);
}
if (output === 'document') {
return docs;
}
else {
let finaltext = '';
for (const doc of docs) {
finaltext += `${doc.pageContent}\n`;
}
return (0, utils_1.handleEscapeCharacters)(finaltext, false);
}
}
}
module.exports = { nodeClass: Oxylabs_DocumentLoaders };
//# sourceMappingURL=Oxylabs.js.map