UNPKG

proxy-lists

Version:

Get proxies from publicly available proxy lists.

301 lines (264 loc) 7.26 kB
#!/usr/bin/env node 'use strict'; // To fix "MaxListenersExceededWarning: Possible EventEmitter memory leak detected" warning. // Setting max listeners to a large yet reasonable number. process.setMaxListeners(1024); var _ = require('underscore'); var fs = require('fs'); var path = require('path'); var program = require('commander'); var pkg = require('./package.json'); var ProxyLists = require('./index'); var validOutputFormats = ['json', 'csv', 'txt']; var proxyFieldNames = ['source', 'ipAddress', 'port', 'country', 'protocols', 'anonymityLevel']; function list(value) { return value.split(','); } function value(value) { return value; } program .version(pkg.version) .description(pkg.description); program .command('getProxies') .option( '-m, --filter-mode [value]', 'Set the filter mode [strict or loose]', value, null ) .option( '-a, --anonymity-levels <list>', 'Get proxies with these anonymity levels [' + ProxyLists._anonymityLevels.join(', ') + ']', list, null ) .option( '-c, --countries <list>', 'Get proxies from these countries [us, ca, cz, ..]', list, null ) .option( '-C, --countries-black-list <list>', 'Exclude proxies from these countries [de, gb, ..]', list, null ) .option( '-p, --protocols <list>', 'Get proxies that support these protocols [' + ProxyLists._protocols.join(', ') + ']', list, null ) .option( '-s, --sources-white-list <list>', 'Get proxies from these sources only [some-source, another-source, somewhere, etc..]', list, null ) .option( '-x, --sources-black-list <list>', 'Do not get proxies from these sources [some-source, another-source, somewhere, etc..]', list, null ) .option( '--sources-dir [value]', 'Full path to the sources directory', value, ProxyLists.defaultOptions.sourcesDir ) .option( '-f, --output-file [value]', 'File to which the output will be written', value, 'proxies' ) .option( '-F, --output-format [value]', 'Format in which the output will be written [' + validOutputFormats.join(', ') + ']', value, 'txt' ) .option( '--series', 'Perform all asynchronous operations in series' ) .option( '--sample', 'Get a sample of proxies from each source' ) .option( '--stdout', 'Write to STDOUT instead of a file', value, false ) .option( '-l, --log-file [value]', 'File to which will be logged when writing to stdout', value, 'proxy-lists.log' ) .action(function(options) { var outputFormat = options.outputFormat; var stdout = options.stdout; var outputFile = options.outputFile; if (outputFile.indexOf('/') === -1) { outputFile = path.join(process.cwd(), outputFile); } if (!path.extname(outputFile)) { outputFile = outputFile + '.' + outputFormat; } var outputStream; if (!stdout) { outputStream = fs.createWriteStream(outputFile); } else { outputFile = 'STDOUT'; outputStream = { write: function(data) { process.stdout.write(data + '\n'); }, end: function(cb) { cb(); }, on: function() {}, }; } var logFile = options.logFile; if (logFile.indexOf('/') === -1) { logFile = path.join(process.cwd(), logFile); } var logStream = fs.createWriteStream(logFile); function log() { var args = Array.prototype.slice.call(arguments); var message = args.join(' '); logStream.write(message + '\n'); } var numWriting = 0; var wroteData = false; function onData(data) { if (!_.isEmpty(data)) { numWriting++; switch (outputFormat) { case 'json': data = _.map(data, function(row) { return JSON.stringify(row); }); outputStream.write((wroteData ? ',' : '') + data.join(',')); break; case 'csv': data = _.map(data, function(row) { return _.map(proxyFieldNames, function(fieldName) { return _.isArray(row[fieldName]) ? row[fieldName].join('/') : row[fieldName]; }).join(','); }); outputStream.write('\n' + data.join('\n')); break; case 'txt': data = _.map(data, function(row) { return row.ipAddress + ':' + row.port; }); outputStream.write((wroteData ? '\n' : '') + data.join('\n')); break; } numWriting--; wroteData = true; } tryEndOutput(); } function tryEndOutput() { if (canEndOutput()) { endOutput(); } } function canEndOutput() { return doneScrapingAllSources() && !isWriting(); } function isWriting() { return numWriting > 0; } function doneScrapingAllSources() { return !!_.every(sources, function(source) { return !!sourcesDone[source.name]; }); } var startOutput = _.once(function() { log('Writing output to ' + outputFile); switch (outputFormat) { case 'json': outputStream.write('['); break; case 'csv': outputStream.write(proxyFieldNames.join(',')); break; } }); var endOutput = _.once(function() { log('Closing output stream...'); switch (outputFormat) { case 'json': outputStream.write(']'); break; } outputStream.end(function() { log('Output stream closed'); log('Closing log stream...'); if (logStream) { logStream.end(done); } else { done(); } }); }); var done = _.once(function() { process.exit(); }); log('Getting proxies...'); var listSourcesOptions = _.pick(options, [ 'filterMode', 'anonymityLevels', 'countries', 'countriesBlackList', 'protocols', 'sourcesWhiteList', 'sourcesBlackList', 'sourcesDir', 'sample', 'series', ]); var sources = ProxyLists.listSources(listSourcesOptions); var sourceOptions = _.omit(listSourcesOptions, 'sourcesWhiteList', 'sourcesBlackList'); var sourcesDone = {}; _.each(sources, function(source) { try { ProxyLists.getProxiesFromSource(source.name, sourceOptions) .on('data', onData) .on('error', function(error) { log('Error while scraping', source.name + ':', error); }) .once('end', function() { log('Finished scraping from', source.name); sourcesDone[source.name] = true; tryEndOutput(); }); } catch (error) { log(error); } }); startOutput(); }); program .command('updateGeoIpData') .usage('--license-key <value>\n\nThis module uses geoip-lite to perform geoip-country lookups on IP addresses for\neach proxy. The geoip-lite module ships with the free version of MaxMind\'s geoip\ndatabase. This database stopped being directly included in the module due to\na change on MaxMind\'s side - specifically with their end-user licensing agreements.\nSo it is necessary for each end-user (that\'s you!) to create their own MaxMind\naccount and then generate a license key.\n\nTo sign-up for a MaxMind account:\nhttps://www.maxmind.com/en/geolite2/signup\n\nTo generate a new license key:\nhttps://support.maxmind.com/account-faq/license-keys/how-do-i-generate-a-license-key/') .requiredOption( '-l, --license-key <value>', 'Your MaxMind license key' ) .action(function(options) { process.argv.push('license_key=' + options.licenseKey); require(path.join(__dirname, 'node_modules', 'geoip-lite', 'scripts', 'updatedb.js')); }); program.parse(process.argv);