@prohetamine/proxy-checker
Version:
Proxy parser & proxy checker
353 lines (302 loc) • 10.8 kB
JavaScript
const request = require('request-promise')
, moment = require('moment')
, delay = require('sleep-promise')
, puppeteer = require('puppeteer')
, fs = require('fs')
, { spawn } = require('child_process')
, createHash = require('./lib/create-hash')
let proxys = []
, checkedProxys = {}
, timeIds = {}
const filter = (proxys, { port }) => {
if (port !== false) {
const _port = port.toString()
return proxys.filter(proxy => proxy.split(':')[1] === _port)
}
return proxys
}
const quarantine = (proxys, proxy) =>
proxys.filter(_proxy => proxy !== _proxy)
const load = async ({ port = false, debug = false } = { port: false, debug: false }) => {
const new_proxys = []
const date = moment().format(`YYYY-MM`) + '-' + moment().format(`DD`)
, prevDate = moment().format(`YYYY-MM`) + '-' + (parseInt(moment().format(`DD`)) - 1)
try {
JSON.parse(await request(`https://checkerproxy.net/api/archive/${date}`)).forEach((ip) => new_proxys.push(ip.addr))
} catch (e) {
debug && console.log('checkerproxy load error')
}
try {
(await request(`https://top-proxies.ru/free_proxy/fre_proxy_api.php`)).match(/.+/gi).forEach((ip) => new_proxys.push(ip))
} catch (e) {
debug && console.log('top-proxies load error')
}
try {
(await request(`https://free-proxy-list.net`)).match(/\d+\.\d+\.\d+\.\d+:\d+/gi).forEach((ip) => new_proxys.push(ip))
} catch (e) {
debug && console.log('free-proxy-list load error')
}
try {
JSON.parse((await request(`https://www.proxy-list.download/api/v0/get?l=en&t=http`)).match(/"LISTA": .+/gi)[0].replace(/("LISTA": |}]$)/gi, '')).forEach(({ IP, PORT }) => new_proxys.push(IP + ':' + PORT))
} catch (e) {
debug && console.log('proxy-list load error')
}
try {
JSON.parse(await request(`https://checkerproxy.net/api/archive/${prevDate}`)).forEach(ip => new_proxys.push(ip.addr))
} catch (e) {
debug && console.log('checkerproxy load error')
}
try {
const links = JSON.parse(await request(`https://api.openproxy.space/list?skip=0&ts=${new Date() - 1}`)).filter(({ title }) => title === 'FRESH HTTP/S').map(({ code }) => code)
for (let l = 0; l < links.length; l++) {
try {
const ips = (await request(`https://openproxy.space/list/${links[l]}`)).match(/\d+\.\d+\.\d+\.\d+:\d+/gi)
ips.forEach((ip) => new_proxys.push(ip))
} catch (e) {
debug && console.log('openproxy load error')
}
}
} catch (e) {
debug && console.log('openproxy load error')
}
let _proxys = {}
new_proxys.forEach(item => _proxys[item] = true)
_proxys = Object.keys(_proxys)
if (_proxys.length > 0) {
if (port !== false) {
const _port = port.toString()
proxys = _proxys.filter(proxy => proxy.split(':')[1] === _port)
} else {
proxys = _proxys
}
}
return proxys.length
}
const loadInterval = async (
callback = () => {},
ms = 5000,
option = {
started: false,
port: false,
debug: false
}
) => {
const instance = async () => {
try {
await load({
port: option.port,
debug: option.debug
})
} catch (e) {}
callback(proxys.length)
}
option.started && instance()
const timeId = setInterval(instance, ms)
return () => clearInterval(timeId)
}
const defaultBrowserConfig = proxy => ({
headless: true,
ignoreHTTPSErrors: true,
args: [`--proxy-server=${proxy}`],
})
const checkerInterval = async (
key = null,
{
url = null,
isBrowser = false,
trashIgnore = false,
browserConfig = false,
onData = false,
timeout = 10000,
stream = 2,
debug = false,
indicators = [],
session = false,
quarantineMode = true
} = {
url: null,
isBrowser: false,
trashIgnore: false,
browserConfig: false,
timeout: 10000,
stream: 2,
onData: false,
debug: false,
indicators: [],
session: false,
quarantineMode: false
}
) => {
const _key = key === null ? createHash() : key
, _url = url === null ? 'https://yandex.ru' : url
, _isBrowser = typeof(isBrowser) === 'boolean' ? isBrowser : false
, _trashIgnore = typeof(trashIgnore) === 'boolean' ? trashIgnore : false
, _timeout = typeof(timeout) === 'number' ? timeout : 10000
, _stream = typeof(stream) === 'number' ? stream : 2
, _debug = typeof(debug) === 'boolean' ? debug : false
, _quarantineMode = typeof(quarantineMode) === 'boolean' ? quarantineMode : false
, _indicators = indicators instanceof Array ? indicators : []
, _session = session
? (() => {
try {
return JSON.parse(fs.readFileSync(session, { encoding: 'utf8' }))
} catch (e) {
return []
}
})()
: false
, _browserConfig = browserConfig instanceof Object ? browserConfig : defaultBrowserConfig
, _onData = typeof(onData) === 'boolean' ? (() => {}) : onData
checkedProxys[_key] = _session === false ? [] : [..._session]
const _timeIds = await Promise.all(
Array(_stream).fill(1).map(async (_, index) => {
const timeId = createHash()
timeIds[timeId] = true
const instance = async (id, i_id) => {
if (timeIds[timeId] === false) {
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Kill instance]')
return
}
const proxy = (() => {
let _proxy = random()
while (checkedProxys[_key].find(proxy => _proxy === proxy)) {
_proxy = random()
}
return _proxy
})()
if (_isBrowser) {
try {
const browser = await puppeteer.launch(_browserConfig(proxy))
setTimeout(() => {
try {
browser.close()
} catch (e) {}
}, timeout)
try {
const page = await browser.newPage()
if (_trashIgnore) {
await page.setRequestInterception(true)
page.on('request', request => {
if (['media', 'xhr', 'fetch', 'websocket', 'manifest', 'image', 'stylesheet', 'font', 'script'].indexOf(request.resourceType()) !== -1) {
request.abort()
} else {
request.continue()
}
})
}
await page.goto(_url)
if (!!await _indicators.find(async ({ selector }) => {
try {
return await page.$(selector)
} catch (e) {
return null
}
})) {
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Load & parse] valid proxy: ' + proxy)
checkedProxys[_key].push(proxy)
_onData(await page.evaluate(`document.querySelector('*').outerHTML`))
if (_session !== false) {
_session.push(proxy)
}
instance(id, i_id + 1)
browser.close()
return
}
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Load] valid proxy: ' + proxy)
} catch (e) {
if (_quarantineMode) {
proxys = quarantine(proxys, proxy)
}
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Not load] invalid proxy: ' + proxy)
}
browser.close()
} catch (e) {
if (_quarantineMode) {
proxys = quarantine(proxys, proxy)
}
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Browser error] invalid proxy: ' + proxy)
}
instance(id, i_id + 1)
} else {
try {
const result = await new Promise(res => {
const _request = spawn('node', [__dirname + '/lib/request.js', _url, 'http://' + proxy, _timeout, JSON.stringify(_indicators.map(({ keyword }) => keyword))])
_request.stdout.on('data', data => res(`${data}`.trim()))
})
if (result === 'timeout') {
throw new Error()
}
if (result === 'true') {
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Load & parse] valid proxy: ' + proxy)
checkedProxys[_key].push(proxy)
_onData('/* todo */')
if (_session !== false) {
_session.push(proxy)
}
return instance(id, i_id + 1)
}
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Load] valid proxy: ' + proxy)
} catch (e) {
if (_quarantineMode) {
proxys = quarantine(proxys, proxy)
}
_debug && console.log('Stream: ' + id + ' [' + i_id + '] [Not load] invalid proxy: ' + proxy)
}
instance(id, i_id + 1)
}
}
for (;;) {
if (proxys.length > 0) {
setTimeout(instance, index * 2000, index, 0)
break
}
await delay(500)
}
return timeId
})
)
return {
key: _key,
kill: () => {
_timeIds.forEach(timeId => (timeIds[timeId] = false))
fs.writeFileSync(session, JSON.stringify(_session))
},
save: () => fs.writeFileSync(session, JSON.stringify(_session)),
clean: () => {
checkedProxys[_key] = []
fs.writeFileSync(session, JSON.stringify([]))
}
}
}
const all = ({ port = false } = { port: false }) => {
const array = filter(proxys, { port })
return array.length > 0 ? array : []
}
const random = ({ port = false } = { port: false }) => {
const array = all({ port })
return array.length > 0 ? array[parseInt(Math.random() * (array.length - 1))] : false
}
const get = (key = null) => ({
all: ({ port = false } = { port: false }) => {
if (key === null) {
throw new Error('not key name')
}
const array = filter(checkedProxys[key], { port })
return array.length > 0 ? array : []
},
random: ({ port = false } = { port: false }) => {
if (key === null) {
throw new Error('not key name')
}
const array = filter(checkedProxys[key], { port })
return array.length > 0 ? array[parseInt(Math.random() * (array.length - 1))] : false
}
})
module.exports = {
load,
loadInterval,
checkerInterval,
all,
random,
get
}