UNPKG

node-isbot

Version:

isbot for nodejs, Contains most of the world's bot or spider

972 lines (970 loc) 18.9 kB
module.exports = [ // generic 'analyza', 'analyzer', 'aggregator', 'archive', 'archiving', 'auto', 'bot', 'capture', 'check', 'classify', 'clown', 'collect', 'control', 'crawl', 'deep[-\\s]?link', 'detector', 'download(?:s|er)', 'extract', 'eyes', 'fantom', 'feed', 'fetch', 'finder', 'gather', 'getter', 'gopher', 'hack', 'harvest', 'hound', 'html2', 'http_client', 'images', 'index', 'java/', 'leech', 'library', 'library', 'link check', 'linkman', 'links?\\s?check', 'loader', 'locate', 'locator', 'mack', 'monitor', 'parse', 'perl', 'phantom', 'php/\\d', 'program', 'python', 'rating', 'reader', 'reaper', 'retrieve', 'scan', 'scrape', 'search', 'search[-\\s]?engine', 'seer', 'siphon', 'site[-\\s]?check', 'site[-\\s]?scan', 'sniff', 'somewhere', 'spider', 'spy', 'spyder', 'sweep', 'thumb', 'tracker', 'url', 'utility', 'validator', 'verifier', 'verify', 'warez', 'web[-\\s]?search', 'webinator', 'worth', 'yahoo', // ua specific '!Susie', '/www\\.answerbus\\.com', '/www\\.unchaos\\.com', '/www\\.wmtips\\.com', '008/', '192\\.comAgent', '8484 Boston Project', '<http://www\\.sygol\\.com/>', 'A-Online Search', 'A6-Indexer', 'ADmantX', 'AVSearch', 'Aberja Checkomat', 'Abonti', 'Aboundex', 'Accoona-AI-Agent', 'Ad Muncher', 'AddThis', 'AltaVista Intranet', 'Anemone', 'Anturis Agent', 'Aport', 'AppEngine-Google', 'Arachmo', 'Arachnoidea', 'Arachnophilia/', 'AspTear', 'Avirt Gateway Server', 'Azureus', 'B-l-i-t-z-B-O-T', 'BCKLINKS 1\\.0', 'BMLAUNCHER', 'BStop\\.BravoBrian\\.it Agent Detector', 'BUbiNG', 'BW-C-2', 'B_l_i_t_z_B_O_T', 'BackStreet Browser', 'Big Brother', 'Big Fish', 'BigBozz/', 'BilderSauger', 'BlackWidow', 'BlogPulseLive', 'Blogpulse', 'Bookmark Buddy', 'Bookmark Renewal', 'BorderManager', 'BravoBrian', 'Browsershots', 'BullsEye', 'BunnySlippers', 'Buscaplus', 'Butterfly/', 'CC Metadata Scaper', 'CE-Preload', 'CERN-HTTPD', 'CJB\\.NET Proxy', 'COAST WebMaster Pro/', 'CSE HTML Validator Professional', 'Ceramic Tile Installation Guide', 'Cerberian Drtrs', 'Charlotte', 'Chat Catcher/', 'CheckWeb', 'China Local Browse', 'Chitika ContentHit', 'Claymont\\.com', 'CloudFlare-AlwaysOnline', 'CoBITSProbe', 'ColdFusion', 'Commons-HttpClient', 'ContentSmartz', 'Covac UPPS Cathan', 'Covario-IDS', 'Custo x\\.x \\(www\\.netwu\\.com\\)', 'CyberPatrol', 'DA \\d', 'DAP x', 'DAUMOA-video', 'DBrowse \\d', 'DDD', 'DISCo Pump x\\.x', 'DNS-Tools Header-Analyzer', 'DSurf15', 'DTAAgent', 'DTS Agent', 'DataparkSearch', 'DepSpid', 'DigOut4U', 'Digg', 'DnloadMage', 'DomainAppender', 'Download Demon', 'Download Druid', 'Download Express', 'Download Master', 'Download Ninja', 'Download Wonder', 'DownloadDirect', 'Download\\.exe', 'DreamPassport', 'Drupal', 'Dual Proxy', 'EARTHCOM', 'EBrowse \\d', 'ESurf15', 'Educate Search VxB', 'EldoS TimelyWeb/', 'ElectricMonk', 'EmailWolf', 'Embedly/', 'Evliya Celebi', 'Exalead', 'Expired Domain Sleuth', 'Exploratodo/', 'ExtractorPro', 'Extreme Picture Finder', 'EyeCatcher', 'FDM \\d', 'FLATARTS_FAVICO', 'FSurf', 'FairAd Client', 'FastBug', 'FavIconizer', 'FavOrg', 'Faveeo/', 'Feedfetcher-Google', 'FindAnISP\\.com', 'FindLinks', 'Flamingo_SearchEngine', 'FlashGet', 'FlipboardRSS/', 'FollowSite', 'FollowSite\\.com', 'FuseBulb\\.Com', 'GTmetrix', 'Genieo', 'GigablastOpenSource', 'Go!Zilla', 'GoBeez', 'GoForIt\\.com', 'GoForIt\\.com', 'Goldfire Server', 'Google Wireless Transcoder', 'GroupHigh/', 'H010818', 'HTTPGet', 'HTTPResume', 'Hatena Mobile Gateway/', 'Hatena Pagetitle Agent/', 'Hatena RSS/', 'HiDownload', 'HitList', 'Holmes', 'HubSpot Marketing Grader', 'HyperixScoop', 'IDA', 'IEFav172Free', 'IODC', 'IOI', 'ISC Systems iRc Search', 'IlTrovatore-Setaccio', 'InAGist', 'InfoSeek Sidewinder/', 'InfoWizards Reciprocal Link System PRO', 'Inktomi Search', 'Insitor\\.com search', 'Insitornaut', 'InstallShield DigitalWizard', 'Internet Ninja', 'InterseekWeb', 'JBH Agent 2\\.0', 'Jack', 'JemmaTheTourist', 'JetCar', 'Journster', 'KDDI-SN22', 'Kapere', 'Kevin', 'KimonoLabs', 'KnowItAll', 'Kontiki Client', 'L\\.webis', 'Lachesis', 'Larbin', 'LibertyW', 'Lincoln State Web Browser', 'Link Commander', 'Link Valet', 'LinkExaminer', 'LinkPimpin', 'LinkProver', 'LinkStash', 'LinkTiger', 'LinkWalker', 'Linkguard', 'Links2Go', 'Lipperhey Link Explorer', 'Lipperhey SEO Service', 'Lipperhey-Kaus-Australis/', 'Look\\.com', 'Lovel', 'MARTINI', 'MFHttpScan', 'MSIE or Firefox mutant', 'MVAClient', 'Mac Finder', 'MantraAgent', 'MapoftheInternet\\.com', 'Marketwave Hit List', 'Martini', 'Marvin', 'MasterSeek', 'Mata Hari/', 'Mediapartners-Google', 'MegaIndex\\.ru', 'MegaSheep', 'Megite', 'Mercator', 'MetaProducts Download Express', 'MetaURI', 'MicroBaz', 'Microsoft_Internet_Explorer_5', 'Mindjet MindManager', 'Missouri College Browse', 'Mister Pix', 'Mizzu Labs', 'Mnogosearch', 'Mo College', 'MonTools\\.com', 'Morning Paper', 'Mrcgiguy', 'Mulder', 'MuscatFerret', 'MusicWalker2', 'NG-Search', 'NORAD National Defence Network', 'NetMechanic', 'NetSprint', 'Netcraft Web Server Survey', 'NetcraftSurveyAgent/', 'NewsGator', 'Norton-Safeweb', 'Notifixious', 'NutchCVS', 'Nymesis', 'ODP links', 'OSSProxy', 'Octopus', 'Octora Beta', 'OliverPerry', 'Onet\\.pl', 'Online Website Link Checker', 'Oracle Application', 'Orbiter', 'PBrowse', 'PEval', 'PSurf15a', 'Page Analyzer', 'Page Valet/', 'Pagebull', 'PagmIEDownload', 'Panopta v', 'PayPal IPN', 'Peew', 'Perman Surfer', 'Pingdom', 'Pingoscope', 'Pita', 'Pizilla', 'Ploetz \\+ Zeller', 'Plukkie', 'Pockey7', 'Pogodak', 'Poirot', 'Pompos', 'Port Huron Labs', 'PostFavorites', 'PostPost', 'Powermarks', 'Project XP5', 'PureSight', 'PuxaRapido', 'PycURL', 'QXW03018', 'Qango\\.com Web Directory', 'Qseero', 'QuepasaCreep', 'Qwantify', 'REL Link Checker', 'RMA/1\\.0', 'RSurf15a', 'Radian6', 'RankSonicSiteAuditor/', 'ReGet', 'RetrevoPageAnalyzer', 'Riddler', 'Rival IQ', 'RoboPal', 'Robosourcer', 'SBIder', 'SEOCentro', 'SEOstats', 'SSurf15a', 'Scooter', 'ScoutAbout', 'ScoutJet', 'Scrapy', 'Scrubby', 'SearchSight', 'Seeker\\.lookseek\\.com', 'Seznam screenshot-generator', 'Shagseeker', 'ShopWiki', 'Siigle Orumcex', 'SimplyFast\\.info', 'Simpy', 'Site Server', 'Site24x7', 'SiteBar', 'SiteCondor', 'SiteRecon', 'SiteSnagger', 'SiteUptime\\.com', 'SiteXpert', 'SkypeUriPreview', 'Snappy', 'Sphere Scout', 'Sphider', 'SquidClamAV_Redirector', 'Sqworm', 'StackRambler', 'StatusCake', 'SuperCleaner', 'SurfMaster', 'SurferF3', 'T-H-U-N-D-E-R-S-T-O-N-E', 'TSurf15a', 'Tagword', 'Talkro Web-Shot', 'TargetSeek', 'Teleport Pro', 'Teradex Mapper', 'Theophrastus', 'TinEye', 'Twingly Recon', 'Twotrees Reactive Filter', 'TygoProwler', 'Ultraseek', 'Under the Rainbow', 'UnwindFetchor', 'UofTDB_experiment', 'User-Agent: ', 'VYU2', 'Vagabondo', 'Version: xxxx Type:xx', 'Vivante Link Checker', 'Vonna\\.com b o t\\', 'Vortex', 'WFARC', 'WSN Links', 'Wappalyzer', 'Watchfire WebXM', 'Waypath Scout', 'WeSEE:Search', 'Web Snooper', 'WebCompass', 'WebPix', 'WebVac', 'Webclipping\\.com', 'Webglimpse', 'Weblog Attitude Diffusion', 'Website Explorer', 'Websnapr/', 'Websquash\\.com', 'Webster v0\\.', 'Webverzeichnis\\.de', 'WhizBang! Lab', 'Whizbang', 'Wildsoft Surfer', 'WinGet', 'WinHTTP', 'WoW Lemmings Kathune', 'WomlpeFactory', 'WordPress\\.com mShots', 'WorldLight', 'XML Sitemaps Generator', 'Xenu Link Sleuth', 'Xenu\'s Link Sleuth', 'Xylix', 'Y!J-ASR', 'YandeG', 'YandexImages', 'YandexMetrika', 'Yoleo', 'Yoono', 'Zao', 'Zearchit', 'Zippy', 'ZnajdzFoto/Image', 'ZyBorg', '\\(privoxy/', '^ng/', 'aboutthedomain', 'accoona', 'acoon', 'adbeat\\.com', 'agada.de', 'agadine/', 'aiderss/', 'airmail\\.etn', 'airmail\\net', 'aladin/', 'alexa site audit', 'allrati/', 'alyze\\.info', 'amzn_assoc', 'appie', 'arachnode\\.net', 'araneo/', 'archive-com', 'asafaweb\\.com', 'asahina-antenna/', 'ask[-\\s]?jeeves', 'ask\\.24x\\.info', 'aspseek/', 'assort/', 'asterias/', 'atomic_email_hunter/', 'atomz/', 'augurfind', 'augurnfind', 'avsearch-3\\.0\\(altavista/avc\\)', 'beammachine/', 'beebwaredirectory/v0\\.01', 'bibnum\\.bnf', 'bigbrother/', 'biglotron', 'bilbo/', 'binlar', 'blaiz-bee/', 'bloglines-images/', 'bloglines/', 'blogsearch/', 'blogzice/', 'bobby/', 'boitho\\.com-dc', 'bookdog/x\\.x', 'bookmarkbase\\(2/;http://bookmarkbase\\.com\\)', 'bpimagewalker/', 'bsdseek/', 'btwebclient/', 'bumblebee@relevare\\.com', 'bwh3_user_agent', 'calif/', 'carleson/', 'ccubee/x\\.x', 'cfetch/', 'cg-eye interactive', 'changedetection', 'charon/', 'checklinks/', 'cloakdetect/', 'cnet\\.com', 'coccoc', 'cocoal\\.icio\\.us/', 'collage\\.cgi/', 'combine/', 'combine/x\\.0', 'contenttabreceiver', 'convera', 'copperegg/revealuptime/fremontca', 'coralwebprx/', 'cosmos', 'cougarsearch/', 'crowsnest/', 'csci_b659/', 'curl', 'cuwhois/', 'datacha0s/', 'datafountains/dmoz', 'dataprovider', 'dbdig\\(http://www\\.prairielandconsulting\\.com\\)', 'dc-sakura/x\\.xx', 'deepak-usc/isi', 'del\\.icio\\.us-thumbnails/', 'delegate/', 'diagem/', 'diamond/x\\.0', 'dlman', 'dlvr\\.it/', 'docomo/', 'drupact', 'e-sense', 'easydl/', 'ec2linkfinder', 'ecairn\\.com/grabber', 'echo!/', 'efp@gmx\\.net', 'egothor/', 'ejupiter\\.com', 'enterprise_search/', 'envolk', 'europarchive\\.org', 'eventax/', 'exactseek\\.com', 'ezooms', 'facebookexternalhit', 'faedit/', 'favcollector/', 'feeltiptop\\.com', 'fileboost\\.net/', 'filtrbox/', 'findlink', 'findthatfile', 'firefly/', 'flexum/', 'fluffy', 'flunky', 'focusedsampler/', 'forensiq\\.com', 'francis/', 'freshdownload/x\\.xx', 'g00g1e\\.net', 'galaxy\\.com', 'gazz/x\\.x', 'geek-tools\\.org', 'genderanalyzer', 'genieknows', 'geourl/', 'getright/', 'getrightpro/', 'ghostroutehunter/', 'gigabaz/', 'go!zilla/', 'go-ahead-got-it/', 'goblin/', 'gonzo1', 'gonzo2', 'gooblog/', 'goofer/', 'gossamer-threads\\.com', 'grapefx/', 'gromit/', 'grub-client', 'gulliver/', 'harvest-ng/', 'haste/', 'hatenascreenshot/', 'helix/', 'heritrix', 'hippias/', 'hotmail.com', 'htdig', 'htmlparser/', 'http-header-abfrage/', 'http://anonymouse\\.org/', 'http://ask\\.24x\\.info/', 'http://ozysoftware\\.com/index\\.html', 'http://www\\.ip2location\\.com', 'http://www\\.monogol\\.de', 'http://www\\.sygol\\.com', 'http://www\\.timelyweb\\.com/', 'http::lite/', 'httpunit', 'httrack', 'hyperestraier/', 'iZSearch', 'ichiro', 'ics \\d', 'ideare - SignSite', 'idwhois\\.info', 'iframely/', 'igdeSpyder', 'igetter/', 'iltrovatore-setaccio/', 'imageengine/', 'imagewalker/', 'incywincy\\(http://www\\.look\\.com\\)', 'info@pubblisito\\.com', 'infofly/', 'infolink/', 'infomine/', 'inkpeek\\.com', 'inspectorwww/', 'integrity/', 'integromedb', 'intelix/', 'intelliseek\\.com', 'internetlinkagent/', 'ips-agent', 'iqdb/', 'iria/', 'irvine/', 'isitup\\.org', 'isurf', 'ivia/', 'iwagent/', 'j-phone/', 'jchecklinks/', 'jigsaw/', 'kalooga/kalooga-4\\.0-dev-datahouse', 'kasparek@naparek\\.cz', 'ke_1\\.0/', 'kit-fireball/', 'knowledge\\.com/', 'kulturarw3/', 'kummhttp/', 'labrador/', 'lecodechecker/', 'leia/', 'libweb/clshttp', 'lightningdownload/', 'linkalarm/', 'linkdex', 'linklint-checkonly/', 'linkscan/', 'linkscan/x\\.x', 'linksonar/', 'linksweeper/', 'loadimpactrload/', 'ltx71', 'lwp-', 'lwp-trivial', 'lwp::', 'mabontland', 'magicwml/', 'mail\\.ru/', 'mammoth/', 'masagool/', 'mediasearch/', 'metainspector/', 'metaspinner/', 'metatagsdir/', 'miixpc/', 'miniflux\\.net', 'moget/x\\.x', 'mogimogi', 'moiNAG', 'monkeyagent', 'msnptc/', 'msproxy/', 'multiBlocker browser', 'multitext/', 'mygetright/', 'naofavicon4ie/', 'naparek\\.cz\\', 'nationaldirectoryaddurl/', 'netants/', 'netlookout/', 'netluchs/', 'netpumper/x\\.xx', 'netresearchserver', 'netwu\\.com', 'neutrinoapi/', 'newt', 'nico/', 'noyona_0_1', 'nsauditor/', 'nutch', 'ocelli/', 'oegp', 'online link validator', 'openisearch/', 'ow\\.ly', 'ownCloud News/', 'ozelot/', 'page2rss', 'panscient', 'parasite/', 'pavuk/', 'pd02_1', 'photon/', 'picosearch/', 'pingspot/', 'pinterest/', 'pockey-gethtml/', 'pockey-gethtml/x\\.xx', 'pockey/x\\.x\\.x', 'popdexter/', 'postrank', 'pricepi\\.com', 'privacyfinder/', 'prlog\\.ru', 'pro-sitemaps\\.com', 'protopage/', 'proximic', 'psycheclone', 'puf/', 'rabaz', 'readability/', 'realdownload/', 'reaper/', 'responsecodetest/', 'rico/', 'robozilla/', 'rotondo/', 'rpt-httpclient/', 'samualt9', 'saucenao/', 'scooter/', 'scoutant/', 'scoutmaster', 'searchguild/dmoz/experiment', 'semaforo\\.net', 'semager/', 'semanticdiscovery', 'seo-nastroj\\.cz', 'siteexplorer\\.info', 'sitesucker/', 'sitexy\\.com', 'skampy/', 'skimpy/', 'skywalker/', 'slarp/', 'slider\\.com', 'slurp', 'slysearch/', 'smartdownload/', 'smartwit\\.com', 'sogou', 'sohu agent', 'speeddownload/', 'speedy', 'speng', 'spida/', 'spinne/', 'squidclam', 'stamina/', 'suchbaer\\.de', 'summify', 'suzuran', 'synapse', 'syncit/x\\.x', 'szukacz/', 'tags2dir\\.com/', 'targetblaster\\.com/', 'teoma', 'therarestparser/', 'tkensaku/x\\.x\\(http://www\\.tkensaku\\.com/q\\.html\\)', 'truwoGPS', 'tuezilla', 'tumblr/', 'unknownght\\.com', 'updated', 'uri::fetch/', 'urlbase/', 'urlresolver', 'user-agent=mozilla/', 'utorrent/', 'vakes/', 'vb wininet', 'versus', 'verzamelgids/', 'viking', 'vkshare', 'voltron', 'vonna', 'voyager-hc/', 'w3c-webcon/', 'w3dt\\.net', 'wavefire/', 'wbsrch\\.com', 'wdg_validator/', 'web-bekannt', 'webauto/', 'webbandit/', 'webbug/', 'webcollage', 'webcookies', 'webcorp/', 'webcraft', 'webfetch/', 'webgobbler/', 'weblight/', 'webmastercoffee/', 'webminer/x\\.x', 'webmon ', 'websearch\\.com\\.au/', 'webspinne/', 'webstat/', 'webstripper/', 'webtrafficexpress/x\\.0', 'webtrends/', 'webval/', 'webwatchermonitor/', 'wf84', 'wget', 'whatsapp', 'whatsmyip\\.org', 'whatsup/x\\.x', 'whatuseek_winona/', 'whoami', 'whoiam', 'wish-project', 'worqmada/', 'wotbox', 'wusage/x\\.0@boutell\\.com', 'wwlib/linux', 'www-mechanize/', 'www\\.ackerm\\.com', 'www\\.alertra\\.com', 'www\\.arianna\\.it', 'www\\.ba\\.be', 'www\\.de\\.com', 'www\\.evri\\.com/evrinid', 'www\\.gozilla\\.com\\', 'www\\.idealobserver\\.com', 'www\\.iltrovatore\\.it', 'www\\.iskanie\\.com', 'www\\.kosmix\\.com', 'www\\.megaproxy\\.com', 'www\\.moreover\\.com', 'www\\.mowser\\.com', 'www\\.nearsoftware\\.com', 'www\\.ssllabs\\.com', 'wwwc/', 'wwwoffle/', 'wwwster/', 'wxDownload Fast', 'xirq/', 'xrl/', 'y!j-srd/', 'y!oasis/test', 'yacy', 'yandex', 'yanga', 'yarienavoir\\.net/', 'yeti', 'yoogliFetchAgent', 'youtube-dl', 'zedzo\\.digest/', 'zeus', 'developers\\.google\\.com\\/\\+\\/web\\/snippet\\/', //hz 'http://www.baidu.com/search/spider.html', 'YisouSpider', 'Sogou web spider', 'DNSPod-Monitor', 'bingbot', 'Scrapy', 'Googlebot', 'Nutch', 'DotBot', 'YunaqMonitor', 'AhrefsBot', '360Spider', 'Baiduspider', 'Yahoo! Slurp', 'CFNetwork', 'MJ12bot', 'Baidu-YunGuanCe-SLABot', 'Jakarta Commons-HttpClient', 'Java', 'ltx71', 'YandexBot', 'Python-urllib', 'Wget', 'NuSOAP', 'Apache-HttpClient', 'Sogou Pic Spider', 'Exabot', 'Baiduspider-image', 'Go 1.1 package http', 'WinHTTP', 'Go-http-client', 'EmbeddedWB', 'Googlebot-Image', 'istellabot', 'PHP', 'AdsBot-Google-Mobile', 'Mediapartners-Google', 'JikeSpider', 'larbin', 'baidu Transcoder', 'beegoServer', 'JianKongBao Monitor', 'node-fetch', 'Riddler', 'psbot', 'Microsoft URL Control', 'spider-ads', 'spiderman', 'YRSpider', 'msnbot' ];