@luminati-io/luminati-proxy
Version:
A configurable local proxy for brightdata.com
775 lines (714 loc) • 23.4 kB
JavaScript
// LICENSE_CODE ZON ISC
; /*jslint node:true, esnext:true, es9: true*/
const http = require('http');
const https = require('https');
const tls = require('tls');
const zlib = require('zlib');
const os = require('os');
const stream = require('stream');
const fs = require('fs');
const {exec} = require('child_process');
const check_invalid_header = require('_http_common')._checkInvalidHeaderChar;
const redos_detector = require('redos-detector');
const semver = require('semver');
const cookie = require('cookie');
const request = require('../util/lpm_request.js').defaults({gzip: true});
const conv = require('../util/conv.js');
const date = require('../util/date.js');
const file = require('../util/file.js');
const etask = require('../util/etask.js');
const zerr = require('../util/zerr.js');
const zutil = require('../util/util.js');
const lpm_config = require('../util/lpm_config.js');
const pkg = require('../package.json');
const swagger = require('./swagger.json');
const {SSL_OP_NO_TLSv1_1, SOURCES} = require('./consts.js');
const is_win = process.platform=='win32';
const is_darwin = process.platform=='darwin';
const ip_re = /^\d+\.\d+\.\d+\.\d+$/;
const eip_re = /^\w[0-9a-f]{32}$/i;
const ip_url_re = /^(https?:\/\/)?(\d+\.\d+\.\d+\.\d+)([$/:?])/i;
const E = module.exports = {};
const {assign, keys, entries} = Object;
let ps_list;
E.user_agent = 'luminati-proxy-manager/'+pkg.version;
E.noop = ()=>{};
E.get_source = ({headers})=>headers.referer ? SOURCES.UI
: SOURCES.API;
E.get_username = req=>cookie.parse(req.headers.cookie||'').username;
E.format_json = (json, spaces=2)=>conv.JSON_stringify(json, {spaces});
E.convert_bytes = bytes=>bytes ? conv.scaled_bytes(bytes)+'B' : null;
E.headers_to_a = h=>entries(h).map(p=>({name: p[0], value: p[1]}));
E.param_rand_range = (range=0, mult=1)=>{
if (!Array.isArray(range))
range = (''+range).split(':');
range = range.map(r=>(+r||0)*mult);
if (range.length<2)
return range[0];
if (range[1]<=range[0])
return range[0];
return E.rand_range(range[0], range[1]);
};
E.parce_brd_debug = sock=>{
const {headers={}} = sock;
if (!headers['x-brd-debug'] || sock.brd_debug)
return sock.brd_debug = sock.brd_debug||{};
sock.brd_debug = headers['x-brd-debug'].split('; ').reduce((acc, p)=>{
let [key, val] = p.split('='), num_val = parseFloat(val);
return assign(acc, {
[key]: !isNaN(num_val) && isFinite(val) ? num_val : val,
});
}, {});
};
E.rand_range = (start=0, end=1)=>Math.round(
start+Math.random()*(end-start));
const remove_invalid_headers = headers=>{
for (let key in headers)
{
if (Array.isArray(headers[key]))
{
headers[key] = headers[key].filter(v=>!check_invalid_header(v));
if (!headers[key].length)
delete headers[key];
}
else if (check_invalid_header(headers[key]))
delete headers[key];
}
};
E.sni_callback_fn = certs=>{
let secure_ctx = {};
for (let domain in certs)
{
let keypair = {
key: file.read_e(certs[domain]+'.key', null),
cert: file.read_e(certs[domain]+'.crt', null),
secureOptions: SSL_OP_NO_TLSv1_1,
};
secure_ctx[domain] = tls.createSecureContext(keypair).context;
}
return (servername, cb)=>{
let ctx = secure_ctx[servername]||
secure_ctx[servername.split('.').slice(1).join('.')];
cb(null, ctx);
};
};
E.write_http_reply = (client_res, proxy_res, headers={}, opt={}, end=false)=>{
headers = assign(headers, proxy_res.headers||{});
const hide_sensitive_info = opt.zagent && opt.reseller &&
!['ip', 'token', 'lpm_token'].includes(client_res.lpm_auth_type);
if (client_res.x_hola_context)
headers['x-hola-context'] = client_res.x_hola_context;
if (opt.debug=='full')
{
if (client_res.port)
headers['x-lpm-port'] = client_res.port;
}
if (opt.debug=='none' || hide_sensitive_info)
{
for (let k in headers)
{
if (['x-luminati', 'x-hola', 'x-lpm'].some(h=>k.includes(h)))
delete headers[k];
}
delete headers['x-debug-bw-up'];
delete headers['x-debug-bw-dn'];
delete headers['x-brd-debug'];
}
if (opt.lpm_auth=='full' && client_res.cred && !hide_sensitive_info)
headers['x-lpm-authorization'] = client_res.cred;
client_res.resp_written = true;
if (client_res instanceof http.ServerResponse)
{
try {
client_res.writeHead(proxy_res.statusCode,
proxy_res.statusMessage, headers);
} catch(e){
if (e.code!='ERR_INVALID_CHAR')
throw e;
remove_invalid_headers(headers);
client_res.writeHead(proxy_res.statusCode,
proxy_res.statusMessage, headers);
}
if (end)
client_res.end();
return;
}
if (client_res.writableEnded)
return;
let head = `HTTP/1.1 ${proxy_res.statusCode} ${proxy_res.statusMessage}`
+`\r\n`;
for (let field in headers)
head += `${field}: ${headers[field]}\r\n`;
try {
client_res.write(head+'\r\n', ()=>{
if (end)
client_res.end();
});
} catch(e){
e.message = (e.message||'')+`\n${head}`;
throw e;
}
};
E.native_request = opt=>{
const url = opt.url||opt.uri;
const url_parts = url instanceof URL ? url : new URL(url);
const lib = url_parts.protocol.startsWith('https:') ? https : http;
return lib.request(opt.url||opt.uri, opt);
};
E.is_ip = domain=>!!ip_re.test(domain);
E.is_eip = ip=>!!eip_re.test(ip);
E.is_ip_url = url=>!!E.parse_ip_url(url);
E.parse_ip_url = url=>{
let match = url.match(ip_url_re);
if (!match)
return null;
return {url: match[0]||'', protocol: match[1]||'', ip: match[2]||'',
suffix: match[3]||''};
};
E.is_any_ip = ip=>ip=='any'||ip=='0.0.0.0/0';
E.req_is_ssl = req=>req.socket instanceof tls.TLSSocket;
E.req_is_connect = req=>req.method=='CONNECT';
E.req_full_url = req=>{
if (!E.req_is_ssl(req))
return req.url;
const url = req.url.replace(/^(https?:\/\/[^/]+)?\//,
req.headers.host+'/');
return `https://${url}`;
};
E.gen_id = (id, ind=0, prefix='r')=>{
if (id&&ind)
id = id.replace(/-[0-9]*-/, `-${ind}`);
if (!id)
id = `${prefix}-${ind}-${E.rand_range(1, 1000000)}`;
return id;
};
E.wrp_sp_err = (sp, fn)=>(...args)=>{
try {
return fn.apply(null, args);
} catch(e){
console.error('wrap sp err', e);
sp.throw(e);
}
};
E.parse_http_res = res=>{
let parsed = {
head: '',
body: '',
headers: {},
rawHeaders: {},
status_code: 0,
status_message: '',
};
res = (res||'').split('\r\n\r\n');
parsed.head = res[0];
parsed.body = res[1]||'';
res = parsed.head.split('\r\n');
assign(parsed, res.slice(1).map(h=>h.match(/(.*):(.*)/))
.reduce((acc, curr, ind)=>{
if (!curr)
return acc;
acc.headers[curr[1].toLowerCase()] = curr[2]||'';
acc.rawHeaders[curr[1].toLowerCase()] = curr[2]||'';
return acc;
}, {headers: parsed.headers, rawHeaders: parsed.rawHeaders}));
res = res[0].match(/(\d\d\d) (.*)/);
if (res)
{
parsed.status_code = res[1];
parsed.status_message = res[2]||'';
}
return parsed;
};
E.decode_body = (body, encoding, limit, body_size)=>{
if (limit==-1 || body=='')
return '';
if (!Array.isArray(body))
return body;
const _body = Buffer.concat(body);
let s;
try {
switch (encoding)
{
case 'gzip':
s = zlib.gunzipSync(_body, {finishFlush: zlib.Z_SYNC_FLUSH});
break;
case 'br':
if (body_size && limit && body_size>limit)
return '';
s = zlib.brotliDecompressSync(_body);
break;
case 'deflate':
try {
s = zlib.inflateSync(_body);
} catch(e){
s = zlib.inflateRawSync(_body);
}
break;
default: s = _body; break;
}
} catch(e){
throw new Error(`decoding body failed with encoding ${encoding}: `
+e.message);
}
const res = s.toString('utf8').trim();
if (limit)
return res.slice(0, limit);
return res;
};
E.url2domain = url=>{
const r = /^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:/\n?]+)/img;
const res = r.exec(url);
return res && res[1] || '';
};
E.json = etask._fn(function*util_json(_this, opt){
try {
if (typeof opt=='string')
opt = {url: opt};
opt.json = true;
if (opt.url.includes(pkg.api_domain))
{
opt.headers = assign({'user-agent': E.user_agent},
opt.headers||{});
}
return yield etask.nfn_apply(request, [opt]);
} catch(e){
zerr.info('json opt: %s', JSON.stringify(opt));
zerr.warn('Fetch json error: %s', zerr.e2s(e));
etask.ef(e);
throw e;
}
});
E.count_fd = ()=>etask(function*mgr_count_fd(){
if (is_win || is_darwin)
return 0;
this.alarm(1000);
let list;
try {
list = yield etask.nfn_apply(fs, '.readdir', ['/proc/self/fd']);
} catch(e){ return 0; }
return list.length;
});
E.count_tcp = (key='ESTABLISHED', port='33335')=>etask(function*_count_tcp(){
if (is_win || is_darwin)
return 0;
this.alarm(1000);
const cmd = `netstat -tnp | grep ":${port}" | grep ${key} | wc -l`;
exec(cmd, (err, stdout, stderr)=>{
if (err)
return this.throw(err);
try {
this.continue(Number(stdout));
} catch(e){
this.throw(err);
}
});
return yield this.wait();
});
E.ensure_socket_close = (sock, close, e)=>{
if (sock instanceof http.ClientRequest ||
sock instanceof http.ServerResponse)
{
sock = sock.socket;
}
// copy from util/net.js
if (sock.writable || close)
sock.end();
const destroy = ()=>{
if (!sock.destroyed && typeof sock.destroy == 'function')
sock.destroy(e);
};
if (sock._writableState&&sock._writableState.finished)
destroy();
else if (!sock.destroyed_soon)
setTimeout(destroy, 10*date.ms.SEC);
sock.destroyed_soon = (sock.destroyed_soon|0)+1;
};
E.ensure_socket_emit_close = sock=>{
sock.emitClose = true;
if (sock._writableState)
sock._writableState.emitClose = true;
if (sock._readableState)
sock._readableState.emitClose = true;
};
E.is_ws_upgrade_req = req=>{
const headers = req && req.headers || {};
const upgrade_h = keys(headers).find(h=>h.toLowerCase()=='upgrade');
return req.method=='GET' && upgrade_h && headers[upgrade_h]=='websocket';
};
E.find_iface = iface=>{
const is_ip = /^\d{1,3}(?:\.\d{1,3}){3}$/.test(iface);
if (is_ip)
return iface;
let ifaces;
try {
ifaces = os.networkInterfaces();
} catch(e){
return false;
}
for (let name in ifaces)
{
if (name!=iface)
continue;
let addresses = ifaces[name].filter(data=>data.family=='IPv4');
if (addresses.length)
return addresses[0].address;
}
return false;
};
E.get_lpm_tasks = (opt={})=>etask(function*(){
const regex = opt.all_processes
? /.*(lpm|luminati-proxy|pmgr|proxy-manager).*/
: /.*lum_node\.js.*/;
if (!ps_list)
yield import('ps-list').then(m=>ps_list = m.default);
const tasks = yield ps_list();
const compare_pid = t=>{
if (opt.current_pid)
return t.ppid==process.pid || t.pid==process.pid;
return t.ppid!=process.pid && t.pid!=process.pid;
};
return tasks.filter(t=>t.name.includes('node') && regex.test(t.cmd) &&
compare_pid(t));
});
E.fetch = endpoint=>etask(function*(){
let res;
try {
const headers = {'user-agent': E.user_agent};
res = yield E.json(endpoint, {headers, timeout: 20*date.ms.SEC});
} catch(e){
res = {};
}
return res;
});
E.get_last_version = api_domain=>etask(function*(){
const api_opt = {
url: `https://${api_domain}/lpm/server_conf`,
qs: {md5: pkg.lpm.md5, ver: pkg.version},
};
const github_url = 'https://raw.githubusercontent.com/'
+lpm_config.github_repo+'/master/versions.json';
const gh_opt = {url: github_url};
const [r={}, versions={}] = yield etask.all([E.json(api_opt),
E.json(gh_opt)]);
const newer = r.body?.ver && semver.lt(pkg.version, r.body.ver);
return assign({newer, versions: versions.body||[]}, r.body);
});
E.get_status_tasks_msg = tasks=>{
let msg = '';
const fmt_num = n=>
(+n).toLocaleString('en-GB', {maximumFractionDigits: 2});
const total_mem_mb = os.totalmem() / 1000000;
const get_task_str = (prefix, t)=>`${prefix} = CPU: ${fmt_num(t.cpu)}%, `
+`Memory used: ${fmt_num(t.memory/100*total_mem_mb)}MB`;
const manager = tasks.find(t=>t.cmd.includes('lum_node.js'))||{};
const workers = tasks.filter(t=>t.cmd.includes('worker.js'))||{};
msg += `PID: ${manager.pid}\n`;
msg += `${get_task_str('Manager (lum_node.js)', manager)}`;
workers.forEach((w, i)=>
msg += '\n'+get_task_str(`Worker ${i} (worker.js)`, w));
return msg;
};
E.get_host_port = ctx=>`${ctx.host}:${ctx.proxy_port}`;
E.format_platform = platform=>{
return {win32: 'windows', darwin: 'mac'}[platform] || platform;
};
E.perr = function(id, info={}, opt={}){
const _info = assign({zagent: os.hostname()}, info);
if (global.it)
return;
return zerr.perr(id, _info, opt);
};
E.omit_by = (obj, fn)=>
zutil.reduce_obj(obj, (v, k)=>k, (v, k)=>fn(v, k) ? undefined : v);
E.omit_defaults = (obj, defaults)=>E.omit_by(obj, (v, k)=>{
if (Array.isArray(v) && !v.length)
return true;
if (typeof v=='object')
return zutil.equal_deep(v, defaults[k]);
return !lpm_config.mgr_proxy_shared_fields.includes(k) && v===defaults[k];
});
const rand_range = (start=0, end=1)=>Math.round(
start+Math.random()*(end-start));
E.req_util = {
is_ssl: req=>!!req.is_mitm_req,
is_connect: req=>req.method=='CONNECT',
full_url: req=>{
if (!E.req_util.is_ssl(req))
return req.url;
const {servername} = req.client, {host} = req.headers;
const port = host.match(/:\d+/) || [''];
let _host = servername ? servername + port[0] : host;
const _url = req.url.replace(/^(https?:\/\/[^/]+)?\//, _host+'/');
return `https://${_url}`;
},
gen_id: (id, retry)=>{
if (!id)
id = 'r-0-'+rand_range(1, 1000000);
if (retry)
id = id.replace(/-[0-9]*-/, `-${retry}-`);
return id;
},
get_domain: req=>{
let domain = E.url2domain(req.url);
if (domain)
return domain;
if (req.ctx && req.ctx.domain)
return req.ctx.domain;
if (req.ctx && req.ctx.url)
return E.url2domain(req.ctx.url);
return domain;
},
get_remote_ip: req=>{
if (req.socket)
{
if (req.socket._parent && req.socket._parent.lpm_forwarded_for)
return req.socket._parent.lpm_forwarded_for;
if (req.socket.lpm_forwarded_for)
return req.socket.lpm_forwarded_for;
if (req.socket.socket && req.socket.socket.lpm_forwarded_for)
return req.socket.socket.lpm_forwarded_for;
if (req.socket.remoteAddress)
return req.socket.remoteAddress;
if (req.socket.socket && req.socket.socket.remoteAddress)
return req.socket.socket.remoteAddress;
}
return null;
},
};
E.res_util = {
is_one_of_types: (res, types)=>{
const headers = res.headers||{};
const content_type = headers['content-type']||'';
return types.some(type=>content_type.includes(type));
},
};
E.swagger_util = {
get_ep: req=>swagger.paths[req.url] &&
swagger.paths[req.url][req.method.toLowerCase()],
};
// http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt v1
class Lb_transform extends stream.Transform {
constructor(){
super({});
this.lb_data = new Buffer.alloc(0);
this.finished = false;
}
_transform(chunk, encoding, cb){
if (this.finished)
this.push(chunk);
else
{
this.lb_data = Buffer.concat([this.lb_data, chunk]);
let length;
if (~(length = this.lb_data.indexOf('\r\n')))
{
this.finished = true;
let client_data = this.lb_data.toString().slice(0, length)
.split(' ');
if (client_data[0]!='PROXY' || client_data[1]=='UNKNOWN')
return this.destroy('Malformed proxy protocol');
this.emit('parsed', {remote_ip: client_data[2]});
if (this.lb_data.length>length+2)
this.push(this.lb_data.slice(length+2));
}
else if (this.lb_data.length>=108)
return this.destroy('Malformed proxy protocol');
}
cb();
}
}
E.Lb_transform = Lb_transform;
class Timeouts {
constructor(){
this.timeouts = new Set();
}
set_timeout(cb, delay){
const timeout = setTimeout(()=>{
this.timeouts.delete(timeout);
cb();
}, delay);
this.timeouts.add(timeout);
}
set_interval(cb, delay){
const interval = setInterval(()=>{
this.timeouts.delete(interval);
cb();
}, delay);
this.timeouts.add(interval);
}
clear(){
this.timeouts.forEach(clearTimeout);
this.timeouts.clear();
}
}
E.Timeouts = Timeouts;
class Timeline {
constructor(){
this.req_chain = [];
}
track(name){
this.req[name] = Date.now();
}
get_delta(name){
let metric1 = this.get(name);
let metric2 = this.get('create');
if (!metric1||!metric2)
return 0;
return metric1-metric2;
}
get(name, idx, nofb){
if (typeof idx!='number')
idx = this.req_chain.length-1;
if (!Array.isArray(name))
name = [name];
for (let i=0; i<name.length; i++)
{
if (this.req_chain[idx][name[i]])
return this.req_chain[idx][name[i]];
}
return null;
}
add(port, session={}){
const now = Date.now();
if (this.req && !this.req.end)
this.req.end = now;
this.req = {create: now, port, session: session.session||'no session'};
this.req_chain.push(this.req);
}
}
E.Timeline = Timeline;
E.coercers = {
number: {
boolean: v=>{
if (v===0)
return false;
if (v===1)
return true;
return v;
},
date: v=>date(v),
},
string: {
boolean: v=>{
if (v==='')
return false;
if (v=='false' || v=='0')
return false;
if (v=='true' || v=='1')
return true;
return v;
},
number: v=>{
if (v==='')
return 0;
if (/^[+-]?\.?[0-9]/.test(v) && !/[^0-9.+-]/.test(v))
return Number.parseFloat(v);
return v;
},
array: v=>{
if (v==='')
return [];
return v.split(/[\s,]+/).filter(x=>!!x);
},
date: v=>{
let parsed;
if (v==='')
parsed = null;
else if (/^[+-]?\.?[0-9]/.test(v) && !/[^0-9.+-]/.test(v))
parsed = date(Number.parseFloat(v));
else
parsed = date(v);
if (parsed && Number.isFinite(+parsed))
return parsed;
return v;
},
is: {
boolean: v=>v=='false' || v=='true',
},
},
default: to_type=>value=>{
switch (to_type)
{
case 'number':
return Number(value);
case 'string':
return String(value);
case 'boolean':
return Boolean(value);
case 'array':
return Array.from(value);
case 'date':
return date(value);
default:
return value;
}
},
};
E.alias_type = type=>{
switch (type.toLowerCase())
{
case 'integer':
return 'number';
case 'array':
return 'object';
default:
return type;
}
};
E.get_coercer = (from, to)=>E.coercers[from] && E.coercers[from][to]
|| E.coercers.default(to);
E.try_int_r = obj=>keys(obj).forEach(k=>{
if (Array.isArray(obj[k]))
return obj[k].forEach(item=>E.try_int_r(item));
if (typeof obj[k] == 'object')
return E.try_int_r(obj[k]);
let int = parseInt(obj[k]);
if (!isNaN(int))
obj[k] = int;
if (E.coercers.string.is.boolean(obj[k]))
obj[k] = E.coercers.string.boolean(obj[k]);
if (obj.active != undefined && typeof obj.active == 'string')
obj.active = E.coercers.string.boolean(obj.active);
});
E.bool_str = ['true', 'false'];
E.s3 = {
default_target: 'logs/pm/',
required_fields: ['bucket', 'access_key', 'secret_key'],
get_name_format: ({instant, compress, group_by_day})=>{
const ext = compress ? '.gz' : '';
const sub = instant ? 'test_' : '';
const name = group_by_day ? '%H:%M' : '%Y-%m-%d_%H:%M';
const group = group_by_day ? '%Y-%m-%d/' : '';
return `${group}brd_${sub}${name}.log${ext}`;
},
get_tags: ({tag_type, tag_project})=>assign({}, tag_type ?
{type: tag_type} : {}, tag_project ? {project: tag_project} : {}),
get_target: ({target=E.s3.default_target})=>{
if (target.startsWith('/'))
target = target.substring(1);
if (!target.endsWith('/'))
target += '/';
return target;
},
get_config: ({access_key, secret_key})=>({
credentials: {
accessKeyId: access_key,
secretAccessKey: secret_key,
},
}),
validate: opt=>E.s3.required_fields.forEach(field=>{
if (!opt[field])
throw new Error(`${field} is required`);
}),
prepare_opt: opt=>keys(opt).forEach(k=>{
if (E.bool_str.includes(opt[k]))
opt[k] = E.coercers.string.boolean(opt[k]);
}),
};
const REDIRECT_STATUSES = [301, 302, 303, 307, 308];
E.REDIRECT_PARTS = ['protocol', 'hostname', 'pathname', 'port'];
E.is_redirect_status = status=>REDIRECT_STATUSES.includes(status);
E.get_ports = p=>Array.isArray(p) ? p.map(x=>x.port) : keys(p);
E.UOS = E.format_platform(os.platform());
E.validate_regexp_safe = regexp=>regexp instanceof RegExp &&
redos_detector.isSafe(regexp).safe===true;