UNPKG

isbot

Version:

🤖/👨‍🦰 Recognise bots/crawlers/spiders using the user agent string.

230 lines (227 loc) 6.29 kB
// src/patterns.json var patterns_default = [ " daum[ /]", " deusu/", "(?:^|[^g])news(?!sapphire)", "(?<! (?:channel/|google/))google(?!(app|/google| pixel))", "(?<! cu)bots?(?:\\b|_)", "(?<!(?:lib))http", "(?<![hg]m)score", "(?<!cam)scan", "24x7", "@[a-z][\\w-]+\\.", "\\(\\)", "\\.com\\b", "\\btime/", "\\|", "^<", "^[\\w \\.\\-\\(?:\\):%]+(?:/v?\\d+(?:\\.\\d+)?(?:\\.\\d{1,10})*?)?(?:,|$)", "^[^ ]{50,}$", "^\\d+\\b", "^\\w*search\\b", "^\\w+/[\\w\\(\\)]*$", "^active", "^ad muncher", "^amaya", "^avsdevicesdk/", "^azure", "^biglotron", "^bot", "^bw/", "^clamav[ /]", "^client/", "^cobweb/", "^custom", "^ddg[_-]android", "^discourse", "^dispatch/\\d", "^downcast/", "^duckduckgo", "^email", "^facebook", "^getright/", "^gozilla/", "^hobbit", "^hotzonu", "^hwcdn/", "^igetter/", "^jeode/", "^jetty/", "^jigsaw", "^microsoft bits", "^movabletype", "^mozilla/\\d\\.\\d\\s[\\w\\.-]+$", "^mozilla/\\d\\.\\d\\s\\(compatible;?(?:\\s\\w+\\/\\d+\\.\\d+)?\\)$", "^navermailapp", "^netsurf", "^offline", "^openai/", "^owler", "^php", "^postman", "^python", "^rank", "^read", "^reed", "^rest", "^rss", "^snapchat", "^space bison", "^svn", "^swcd ", "^taringa", "^thumbor/", "^track", "^w3c", "^webbandit/", "^webcopier", "^wget", "^whatsapp", "^wordpress", "^xenu link sleuth", "^yahoo", "^yandex", "^zdm/\\d", "^zoom marketplace/", "agent", "analyzer", "archive", "ask jeeves/teoma", "audit", "bit\\.ly/", "bluecoat drtr", "browsex", "burpcollaborator", "capture", "catch", "check\\b", "checker", "chrome-lighthouse", "chromeframe", "classifier", "cloudflare", "convertify", "crawl", "cypress/", "dareboost", "datanyze", "dejaclick", "detect", "dmbrowser", "download", "evc-batch/", "exaleadcloudview", "feed", "fetcher", "firephp", "functionize", "grab", "headless", "httrack", "hubspot marketing grader", "hydra", "ibisbrowser", "infrawatch", "insight", "inspect", "iplabel", "java(?!;)", "library", "linkcheck", "mail\\.ru/", "manager", "measure", "neustar wpm", "node", "nutch", "offbyone", "onetrust", "optimize", "pageburst", "pagespeed", "parser", "perl", "phantomjs", "pingdom", "powermarks", "preview", "proxy", "ptst[ /]\\d", "retriever", "rexx;", "rigor", "rss\\b", "scrape", "server", "sogou", "sparkler/", "speedcurve", "spider", "splash", "statuscake", "supercleaner", "synapse", "synthetic", "tools", "torrent", "transcoder", "url", "validator", "virtuoso", "wappalyzer", "webglance", "webkit2png", "whatcms/", "xtate/" ]; // src/pattern.ts var fullPattern = " daum[ /]| deusu/|(?:^|[^g])news(?!sapphire)|(?<! (?:channel/|google/))google(?!(app|/google| pixel))|(?<! cu)bots?(?:\\b|_)|(?<!(?:lib))http|(?<![hg]m)score|(?<!cam)scan|24x7|@[a-z][\\w-]+\\.|\\(\\)|\\.com\\b|\\btime/|\\||^<|^[\\w \\.\\-\\(?:\\):%]+(?:/v?\\d+(?:\\.\\d+)?(?:\\.\\d{1,10})*?)?(?:,|$)|^[^ ]{50,}$|^\\d+\\b|^\\w*search\\b|^\\w+/[\\w\\(\\)]*$|^active|^ad muncher|^amaya|^avsdevicesdk/|^azure|^biglotron|^bot|^bw/|^clamav[ /]|^client/|^cobweb/|^custom|^ddg[_-]android|^discourse|^dispatch/\\d|^downcast/|^duckduckgo|^email|^facebook|^getright/|^gozilla/|^hobbit|^hotzonu|^hwcdn/|^igetter/|^jeode/|^jetty/|^jigsaw|^microsoft bits|^movabletype|^mozilla/\\d\\.\\d\\s[\\w\\.-]+$|^mozilla/\\d\\.\\d\\s\\(compatible;?(?:\\s\\w+\\/\\d+\\.\\d+)?\\)$|^navermailapp|^netsurf|^offline|^openai/|^owler|^php|^postman|^python|^rank|^read|^reed|^rest|^rss|^snapchat|^space bison|^svn|^swcd |^taringa|^thumbor/|^track|^w3c|^webbandit/|^webcopier|^wget|^whatsapp|^wordpress|^xenu link sleuth|^yahoo|^yandex|^zdm/\\d|^zoom marketplace/|agent|analyzer|archive|ask jeeves/teoma|audit|bit\\.ly/|bluecoat drtr|browsex|burpcollaborator|capture|catch|check\\b|checker|chrome-lighthouse|chromeframe|classifier|cloudflare|convertify|crawl|cypress/|dareboost|datanyze|dejaclick|detect|dmbrowser|download|evc-batch/|exaleadcloudview|feed|fetcher|firephp|functionize|grab|headless|httrack|hubspot marketing grader|hydra|ibisbrowser|infrawatch|insight|inspect|iplabel|java(?!;)|library|linkcheck|mail\\.ru/|manager|measure|neustar wpm|node|nutch|offbyone|onetrust|optimize|pageburst|pagespeed|parser|perl|phantomjs|pingdom|powermarks|preview|proxy|ptst[ /]\\d|retriever|rexx;|rigor|rss\\b|scrape|server|sogou|sparkler/|speedcurve|spider|splash|statuscake|supercleaner|synapse|synthetic|tools|torrent|transcoder|url|validator|virtuoso|wappalyzer|webglance|webkit2png|whatcms/|xtate/"; // src/index.ts var naivePattern = /bot|crawl|http|lighthouse|scan|search|spider/i; var pattern; function getPattern() { if (pattern instanceof RegExp) { return pattern; } try { pattern = new RegExp(fullPattern, "i"); } catch (error) { pattern = naivePattern; } return pattern; } var list = patterns_default; var isbotNaive = (userAgent) => Boolean(userAgent) && naivePattern.test(userAgent); function isbot(userAgent) { return Boolean(userAgent) && getPattern().test(userAgent); } var createIsbot = (customPattern) => (userAgent) => Boolean(userAgent) && customPattern.test(userAgent); var createIsbotFromList = (list2) => { const pattern2 = new RegExp(list2.join("|"), "i"); return (userAgent) => Boolean(userAgent) && pattern2.test(userAgent); }; var isbotMatch = (userAgent) => { var _a, _b; return (_b = (_a = userAgent == null ? void 0 : userAgent.match(getPattern())) == null ? void 0 : _a[0]) != null ? _b : null; }; var isbotMatches = (userAgent) => list.map((part) => { var _a; return (_a = userAgent == null ? void 0 : userAgent.match(new RegExp(part, "i"))) == null ? void 0 : _a[0]; }).filter(Boolean); var isbotPattern = (userAgent) => { var _a; return userAgent ? (_a = list.find((pattern2) => new RegExp(pattern2, "i").test(userAgent))) != null ? _a : null : null; }; var isbotPatterns = (userAgent) => userAgent ? list.filter((pattern2) => new RegExp(pattern2, "i").test(userAgent)) : []; export { createIsbot, createIsbotFromList, getPattern, isbot, isbotMatch, isbotMatches, isbotNaive, isbotPattern, isbotPatterns, list };