UNPKG

@luminati-io/luminati-proxy

Version:

A configurable local proxy for brightdata.com

583 lines (524 loc) 16.8 kB
// LICENSE_CODE ZON ISC 'use strict'; /*jslint node:true, browser:true*/ (function(){ var define; var is_node = typeof module=='object' && module.exports && module.children; var is_rn = (typeof global=='object' && !!global.nativeRequire) || (typeof navigator=='object' && navigator.product=='ReactNative'); var is_ff_addon = typeof module=='object' && module.uri && !module.uri.indexOf('resource://'); var qs; if (is_rn) define = require('./require_node.js').define(module, '../'); else if (!is_node) define = self.define; else { define = require('./require_node.js').define(module, '../'); // XXX arik HACK: react-native bundler will try to require querystring // even thoguh it never reaches this if (it is done in pre-processing) // so we fool him var _require = require; qs = _require('querystring'); } define([], function(){ var assign = Object.assign; var E = {}; function replace_slashes(url){ return url.replace(/\\/g, '/'); } E.add_proto = function(url){ if (!url.match(/^([a-z0-9]+:)?\/\//i)) url = 'http://'+url; return url; }; E.rel_proto_to_abs = function(url){ var proto = is_node ? 'http:' : location.protocol; return url.replace(/^\/\//, proto+'//'); }; E.get_top_level_domain = function(host){ var n = host.match(/\.([^.]+)$/); return n ? n[1] : ''; }; E.get_host = function(url){ var n = replace_slashes(url).match(/^(https?:)?\/\/([^\/]+)\/.*$/); return n ? n[2] : ''; }; E.get_host_without_tld = function(host){ return host.replace(/^([^.]+)\.[^.]{2,3}(\.[^.]{2,3})?$/, '$1'); }; var generic_2ld = {com: 1, biz: 1, net: 1, org: 1, xxx: 1, edu: 1, gov: 1, ac: 1, co: 1, or: 1, ne: 1, kr: 1, jp: 1, jpn: 1, cn: 1}; E.get_root_domain = function(domain){ if (E.is_ip(domain)) return domain; var s = domain.split('.'), root = s, len = s.length; if (len>2) // www.abc.com abc.com.tw www.abc.com.tw,... { var hd = 0; if (s[len-1]=='hola') { hd = 2; // domain.us.hola if (s[len-2].match(/^\d+$/)) hd = 3; // domain.us.23456.hola } if (generic_2ld[s[len-2-hd]]) root = s.slice(-3-hd, len-hd); // abc.com.tw else root = s.slice(-2-hd, len-hd); // abc.com } return root.join('.'); }; E.get_nth_level_domain = function(domain, level, strip_www){ if (E.is_ip(domain)) return domain; var root = E.get_root_domain(domain); var sub = domain.replace(root, '').split('.') .filter(function(s){ return s; }); var www = ''; if (sub[0]=='www') www = sub.shift()+'.'; sub = sub.length-level+1>0 ? sub.slice(sub.length-level+1) : sub; sub = sub.join('.'); if (sub) sub += '.'; if (!strip_www) sub = www+sub; return sub+root; }; // XXX josh: move to email.js:get_domain E.get_domain_email = function(email){ // XXX viktor: /^[\p{L}0-9_.\-+*%!]+@(.*)$/u works only in ES9 var match = String(email||'').toLowerCase() .match(/^[a-z0-9_.\-+*%!ö]+@(.*)$/); return match && match[1]; }; // XXX josh: move to email.js:get_root_domain or remove and let developer // combine email.js:get_domain with url.js:get_root_domain E.get_root_domain_email = function(email){ var domain = E.get_domain_email(email); return domain && E.get_root_domain(domain); }; E.get_path = function(url){ var n = url.match(/^https?:\/\/[^\/]+(\/.*$)/); return n ? n[1] : ''; }; E.to_path = function(str){ var url = str; if (url[0] != '/' && !str.startsWith('http')) url = '/' + url; return E.parse(url).pathname; }; E.get_proto = function(url){ var n = url.match(/^([a-z0-9]+):\/\//); return n ? n[1] : ''; }; E.get_host_gently = function(url){ var n = replace_slashes(url).match(/^(?:(?:[a-z0-9]+?:)?\/\/)?([^\/]+)/); return n ? n[1] : ''; }; E.is_ip = function(host){ var m = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host); if (!m) return false; for (var i=1; i<=4; i++) { if (+m[i]>255) return false; } return true; }; E.is_ip_mask = function(host){ var m = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host); if (!m) return false; if (E.ip2num(host)==0) return false; var final = false; var check_num_mask = function(num){ var arr = (num >>> 0).toString(2).split(''), _final = false; for (var i=0; i<arr.length; i++) { if (_final && arr[i]=='1') return false; if (!_final && arr[i]=='0') _final = true; } return true; }; for (var i=1; i<=4; i++) { if (+m[i]>255) return false; if (final && +m[i]>0) return false; if (!final && +m[i]<255) { if (!check_num_mask(+m[i])) return false; final = true; } } return !!final; }; E.ip2num = function(ip){ var num = 0; ip.split('.').forEach(function(octet){ num <<= 8; num += +octet; }); return num>>>0; }; E.num2ip = function(num){ return (num>>>24)+'.'+(num>>16 & 255)+'.'+(num>>8 & 255)+'.'+(num & 255); }; E.get_subnet24 = function(ip){ return ip.substr(0, ip.lastIndexOf('.'))+'.0/24'; }; E.is_ip_subnet = function(host){ var m = /(.+?)\/(\d+)$/.exec(host); return m && E.is_ip(m[1]) && +m[2]<=32; }; E.cbl_key2sub = function(ip){ return ip.replace(/_/g, '.')+'/24'; }; E.is_ip_netmask = function(host){ if (!host || typeof host.split !== 'function') return false; var ips = host.split('/'); if (ips.length!=2 || !E.is_ip(ips[0]) || !E.is_ip_mask(ips[1])) return false; return true; }; E.is_ip_range = function(host){ if (typeof host.split !== 'function') return false; var ips = host.split('-'); if (ips.length!=2 || !E.is_ip(ips[0]) || !E.is_ip(ips[1])) return false; return E.ip2num(ips[0])<E.ip2num(ips[1]); }; E.is_ip_port = function(host){ var m = /(.+?)(?::(\d{1,5}))?$/.exec(host); return m && E.is_ip(m[1]) && !(+m[2]>65535); }; /* basic url validation to prevent script injection like 'javascript:....' */ E.is_valid_url = function(url){ return /^(https?:\/\/)?([a-z0-9-]+\.)+[a-z0-9-]+(:\d+)?(\/.*)?$/i .test(url); }; E.is_valid_domain = function(domain){ return /^([a-z0-9]([a-z0-9-_]*[a-z0-9])?\.)+[a-z]{2,63}$/.test(domain); }; E.is_http_url = function(url){ return /^https?:\/\//.test(url) && E.is_valid_url(url); }; // XXX josh: migrate callers to email.js:is_valid and drop // XXX josh: is_signup is a nonsense flag for this fn, migrate callers to using // email.js:is_valid_signup_email() E.is_valid_email = function(email, is_signup){ if (!email || typeof email!='string') return false; var re = /^[a-z0-9_\-+*]+(?:\.[a-z0-9_\-+*]+)*@(.*)$/; var n = email.toLowerCase().match(re); if ((n&&is_signup&&email.split('@')[0].match(/\+/g)||[]).length>1) return false; return !!(n && E.is_valid_domain(n[1])); }; E.get_first_valid_email = function(email){ return email.split(/\s+/).find(E.is_valid_email); }; // XXX dmitriie: move to email.js:is_alias E.is_alias_email = function(email){ if (!E.is_valid_email(email)) return false; var n = email.toLowerCase().match(/^([a-z0-9_.\-+*]+)@.*$/); return !!(n && /.+\+.+/.test(n[1])); }; // XXX vadimr: move to email.js:is_need_sanitize E.is_email_need_sanitize = function(email){ var valid_domains = ['gmail.com', 'googlemail.com', 'protonmail.ch', 'protonmail.com']; return valid_domains.indexOf(E.get_domain_email(email)) !== -1; }; // XXX vadimr: move to email.js:sanitize E.sanitize_email = function(email){ var main = E.get_main_email(email); if (!main) return; var sp = main.split('@'); return sp[0].replace(/\.*/g, '')+'@'+sp[1]; }; // XXX dmitriie: move to email.js:get_main E.get_main_email = function(email){ if (!E.is_valid_email(email)) return; if (E.is_alias_email(email)) return email.replace(/\+.+@/, '@'); return email; }; E.is_ip_in_range = function(ips_range, ip){ if (!E.is_ip_range(ips_range) || !E.is_ip(ip)) return false; var ips = ips_range.split('-'); var min_ip = E.ip2num(ips[0]), max_ip = E.ip2num(ips[1]); var num_ip = E.ip2num(ip); return num_ip>=min_ip && num_ip<=max_ip; }; E.is_ip_local = function(ip){ return E.is_ip_in_range('10.0.0.0-10.255.255.255', ip) || E.is_ip_in_range('172.16.0.0-172.31.255.255', ip) || E.is_ip_in_range('192.168.0.0-192.168.255.255', ip) || E.is_ip_in_range('169.254.0.0-169.254.255.255', ip); }; E.host_lookup = function(lookup, host){ var pos, res; while (1) { if (res = lookup[host]) return res; if ((pos = host.indexOf('.'))<0) return; host = host.slice(pos+1); } }; // more-or-less compatible with NodeJS url API E.uri_obj_href = function(uri){ return (uri.protocol||'')+(uri.slashes ? '//' : '') +(uri.host ? (uri.auth ? uri.auth+'@' : '')+uri.host : '') +uri.path +(uri.hash||''); }; var protocol_re = /^((?:about|http|https|file|ftp|ws|wss):)?(\/\/)?/i; var host_section_re = /^(.*?)(?:[\/?#]|$)/; var host_re = /^(?:(([^:@]*):?([^:@]*))?@)?([a-zA-Z0-9._+-]*)(?::(\d*))?/; var path_section_re = /^([^?#]*)(\?[^#]*)?(#.*)?$/; var path_re_loose = /^(\/(?:.(?![^\/]*\.[^\/.]+$))*\/?)?([^\/]*?(?:\.([^.]+))?)$/; var path_re_strict = /^(\/(?:.(?![^\/]*(?:\.[^\/.]+)?$))*\/?)?([^\/]*?(?:\.([^.]+))?)$/; E.parse = function(url, strict){ function re(expr, str){ var m; try { m = expr.exec(str); } catch(e){ m = null; } if (!m) return m; for (var i=0; i<m.length; i++) m[i] = m[i]===undefined ? null : m[i]; return m; } if (!(url = url || !is_node&&location.href)) return {}; var uri = {orig: url}; url = replace_slashes(url); var m, remaining = url; // protocol if (!(m = re(protocol_re, remaining))) return {}; uri.protocol = m[1]; if (uri.protocol!==null) uri.protocol = uri.protocol.toLowerCase(); uri.slashes = !!m[2]; if (!uri.protocol && !uri.slashes) { uri.protocol = 'http:'; uri.slashes = true; } remaining = remaining.slice(m[0].length); // host if (!(m = re(host_section_re, remaining))) return {}; uri.authority = m[1]; remaining = remaining.slice(m[1].length); // host elements if (!(m = re(host_re, uri.authority))) return {}; uri.auth = m[1]; uri.user = m[2]; uri.password = m[3]; uri.hostname = m[4]; uri.port = m[5]; if (uri.hostname!==null) { uri.hostname = uri.hostname.toLowerCase(); uri.host = uri.hostname+(uri.port ? ':'+uri.port : ''); } // path if (!(m = re(path_section_re, remaining))) return {}; uri.relative = m[0]; uri.pathname = m[1]; uri.search = m[2]; uri.query = uri.search ? uri.search.substring(1) : null; uri.hash = m[3]; // path elements if (!(m = re(strict ? path_re_strict : path_re_loose, uri.pathname))) return {}; uri.directory = m[1]; uri.file = m[2]; uri.ext = m[3]; if (uri.file=='.'+uri.ext) uri.ext = null; // finals if (!uri.pathname) uri.pathname = '/'; uri.path = uri.pathname+(uri.search||''); uri.href = E.uri_obj_href(uri); return uri; }; E.qs_parse = function(q, bin, safe){ var obj = {}; q = q.length ? q.split('&') : []; var len = q.length; var unescape_val = bin ? function(val){ return qs.unescapeBuffer(val, true).toString('binary'); } : safe ? function(val){ try { return decodeURIComponent(val.replace(/\+/g, ' ')); } catch(e){ return val; } } : function(val){ return decodeURIComponent(val.replace(/\+/g, ' ')); }; for (var i = 0; i<len; ++i) { var x = q[i]; var idx = x.indexOf('='); var kstr = idx>=0 ? x.substr(0, idx) : x; var vstr = idx>=0 ? x.substr(idx + 1) : ''; var k = unescape_val(kstr); var v = unescape_val(vstr); if (obj[k]===undefined) obj[k] = v; else if (Array.isArray(obj[k])) obj[k].push(v); else obj[k] = [obj[k], v]; } return obj; }; function token_regex(s, end){ return end ? '^'+s+'$' : s; } E.http_glob_host = function(host, end){ var port = ''; var parts = host.split(':'); host = parts[0]; if (parts.length>1) port = ':'+parts[1].replace('*', '[0-9]+'); var n = host.match(/^(|.*[^*])(\*+)$/); if (n) { host = E.http_glob_host(n[1]) +(n[2].length==1 ? '[^./]+' : '[^/]'+(n[1] ? '*' : '+')); return token_regex(host+port, end); } /* '**' replace doesn't use '*' in output to avoid conflict with '*' * replace following it */ host = host.replace(/\*\*\./, '**').replace(/\*\./, '*') .replace(/\./g, '\\.').replace(/\*\*/g, '(([^./]+\\.)+)?') .replace(/\*/g, '[^./]+\\.'); return token_regex(host+port, end); }; E.http_glob_path = function(path, end){ if (path[0]=='*') return E.http_glob_path('/'+path, end); var n = path.match(/^(|.*[^*])(\*+)([^*^\/]*)$/); if (n) { path = E.http_glob_path(n[1])+(n[2].length==1 ? '[^/]+' : '.*')+ E.http_glob_path(n[3]); return token_regex(path, end); } path = path.replace(/\*\*\//, '**').replace(/\*\//, '*') .replace(/\//g, '\\/').replace(/\./g, '\\.') .replace(/\*\*/g, '(([^/]+\\/)+)?').replace(/\*/g, '[^/]+\\/'); return token_regex(path, end); }; E.http_glob_url = function(url, end){ var n = url.match(/^((.*):\/\/)?([^\/]+)(\/.*)?$/); if (!n) return null; var prot = n[1] ? n[2] : '*'; var host = n[3]; var path = n[4]||'**'; if (prot=='*') prot = 'https?'; host = E.http_glob_host(host); path = E.http_glob_path(path); return token_regex(prot+':\\/\\/'+host+path, end); }; E.root_url_cmp = function(a, b){ var a_s = a.match(/^[*.]*([^*]+)$/); var b_s = b.match(/^[*.]*([^*]+)$/); if (!a_s && !b_s) return false; var re, s; if (a_s && b_s && a_s[1].length>b_s[1].length || a_s && !b_s) { s = a_s[1]; re = b; } else { s = b_s[1]; re = a; } s = E.add_proto(s)+'/'; if (!(re = E.http_glob_url(re, 1))) return false; try { re = new RegExp(re); } catch(e){ return false; } return re.test(s); }; E.qs_strip = function(url){ return /^[^?#]*/.exec(url)[0]; }; // mini-implementation of zescape.qs to avoid dependency of escape.js E.qs_str = function(qs){ var q = []; for (var k in qs) { (Array.isArray(qs[k]) ? qs[k] : [qs[k]]).forEach(function(v){ q.push(encodeURIComponent(k)+'='+encodeURIComponent(v)); }); } return q.join('&'); }; E.qs_add = function(url, qs){ var u = E.parse(url), q = assign(u.query ? E.qs_parse(u.query) : {}, qs); var query = E.qs_str(q); u.path = u.pathname+(query ? '?'+query : ''); return E.uri_obj_href(u); }; E.qs_remove = function(url, qs){ var u = E.parse(url), q = assign(u.query ? E.qs_parse(u.query) : {}); qs.forEach(function(query){ delete q[query]; }); var query = E.qs_str(q); u.path = u.pathname+(query ? '?'+query : ''); return E.uri_obj_href(u); }; E.qs_parse_url = function(url){ return E.qs_parse(url.replace(/(^.*\?)|(^[^?]*$)/, '').replace(/#.*$/,'')); }; var INVALID_PATH_REGEX = /[^\u0021-\u00ff]/; E.escape_path = function(path){ return INVALID_PATH_REGEX.test(path) ? encodeURI(path) : path; }; E.remove_protocol = function(url){ var any_protocol_regex = /(^\w+:|^)\/\//; return url.replace(any_protocol_regex, ''); }; E.remove_www = function(url){ if (!url || !url.startsWith('www.')) return url; return url.substr(4); }; E.remove_hash = function(url){ if (!url) return url; var parsed = E.parse(url); return parsed.href.replace(parsed.hash, ''); }; E.is_same_domain = function(domain, urls){ if (typeof domain !== 'string') return false; var hostname = E.parse(domain).hostname; if (!hostname) return false; var _domain = E.get_root_domain(hostname); for (var i=0; i<urls.length; i++) { var d = E.get_root_domain(E.parse(urls[i]).hostname); if (!(d==_domain || d.endsWith(_domain))) return false; } return true; }; return E; }); }());