isbot
Version:
🤖/👨🦰 Recognise bots/crawlers/spiders using the user agent string.
230 lines (227 loc) • 6.29 kB
JavaScript
// src/patterns.json
var patterns_default = [
" daum[ /]",
" deusu/",
"(?:^|[^g])news(?!sapphire)",
"(?<! (?:channel/|google/))google(?!(app|/google| pixel))",
"(?<! cu)bots?(?:\\b|_)",
"(?<!(?:lib))http",
"(?<![hg]m)score",
"(?<!cam)scan",
"24x7",
"@[a-z][\\w-]+\\.",
"\\(\\)",
"\\.com\\b",
"\\btime/",
"\\|",
"^<",
"^[\\w \\.\\-\\(?:\\):%]+(?:/v?\\d+(?:\\.\\d+)?(?:\\.\\d{1,10})*?)?(?:,|$)",
"^[^ ]{50,}$",
"^\\d+\\b",
"^\\w*search\\b",
"^\\w+/[\\w\\(\\)]*$",
"^active",
"^ad muncher",
"^amaya",
"^avsdevicesdk/",
"^azure",
"^biglotron",
"^bot",
"^bw/",
"^clamav[ /]",
"^client/",
"^cobweb/",
"^custom",
"^ddg[_-]android",
"^discourse",
"^dispatch/\\d",
"^downcast/",
"^duckduckgo",
"^email",
"^facebook",
"^getright/",
"^gozilla/",
"^hobbit",
"^hotzonu",
"^hwcdn/",
"^igetter/",
"^jeode/",
"^jetty/",
"^jigsaw",
"^microsoft bits",
"^movabletype",
"^mozilla/\\d\\.\\d\\s[\\w\\.-]+$",
"^mozilla/\\d\\.\\d\\s\\(compatible;?(?:\\s\\w+\\/\\d+\\.\\d+)?\\)$",
"^navermailapp",
"^netsurf",
"^offline",
"^openai/",
"^owler",
"^php",
"^postman",
"^python",
"^rank",
"^read",
"^reed",
"^rest",
"^rss",
"^snapchat",
"^space bison",
"^svn",
"^swcd ",
"^taringa",
"^thumbor/",
"^track",
"^w3c",
"^webbandit/",
"^webcopier",
"^wget",
"^whatsapp",
"^wordpress",
"^xenu link sleuth",
"^yahoo",
"^yandex",
"^zdm/\\d",
"^zoom marketplace/",
"agent",
"analyzer",
"archive",
"ask jeeves/teoma",
"audit",
"bit\\.ly/",
"bluecoat drtr",
"browsex",
"burpcollaborator",
"capture",
"catch",
"check\\b",
"checker",
"chrome-lighthouse",
"chromeframe",
"classifier",
"cloudflare",
"convertify",
"crawl",
"cypress/",
"dareboost",
"datanyze",
"dejaclick",
"detect",
"dmbrowser",
"download",
"evc-batch/",
"exaleadcloudview",
"feed",
"fetcher",
"firephp",
"functionize",
"grab",
"headless",
"httrack",
"hubspot marketing grader",
"hydra",
"ibisbrowser",
"infrawatch",
"insight",
"inspect",
"iplabel",
"java(?!;)",
"library",
"linkcheck",
"mail\\.ru/",
"manager",
"measure",
"neustar wpm",
"node",
"nutch",
"offbyone",
"onetrust",
"optimize",
"pageburst",
"pagespeed",
"parser",
"perl",
"phantomjs",
"pingdom",
"powermarks",
"preview",
"proxy",
"ptst[ /]\\d",
"retriever",
"rexx;",
"rigor",
"rss\\b",
"scrape",
"server",
"sogou",
"sparkler/",
"speedcurve",
"spider",
"splash",
"statuscake",
"supercleaner",
"synapse",
"synthetic",
"tools",
"torrent",
"transcoder",
"url",
"validator",
"virtuoso",
"wappalyzer",
"webglance",
"webkit2png",
"whatcms/",
"xtate/"
];
// src/pattern.ts
var fullPattern = " daum[ /]| deusu/|(?:^|[^g])news(?!sapphire)|(?<! (?:channel/|google/))google(?!(app|/google| pixel))|(?<! cu)bots?(?:\\b|_)|(?<!(?:lib))http|(?<![hg]m)score|(?<!cam)scan|24x7|@[a-z][\\w-]+\\.|\\(\\)|\\.com\\b|\\btime/|\\||^<|^[\\w \\.\\-\\(?:\\):%]+(?:/v?\\d+(?:\\.\\d+)?(?:\\.\\d{1,10})*?)?(?:,|$)|^[^ ]{50,}$|^\\d+\\b|^\\w*search\\b|^\\w+/[\\w\\(\\)]*$|^active|^ad muncher|^amaya|^avsdevicesdk/|^azure|^biglotron|^bot|^bw/|^clamav[ /]|^client/|^cobweb/|^custom|^ddg[_-]android|^discourse|^dispatch/\\d|^downcast/|^duckduckgo|^email|^facebook|^getright/|^gozilla/|^hobbit|^hotzonu|^hwcdn/|^igetter/|^jeode/|^jetty/|^jigsaw|^microsoft bits|^movabletype|^mozilla/\\d\\.\\d\\s[\\w\\.-]+$|^mozilla/\\d\\.\\d\\s\\(compatible;?(?:\\s\\w+\\/\\d+\\.\\d+)?\\)$|^navermailapp|^netsurf|^offline|^openai/|^owler|^php|^postman|^python|^rank|^read|^reed|^rest|^rss|^snapchat|^space bison|^svn|^swcd |^taringa|^thumbor/|^track|^w3c|^webbandit/|^webcopier|^wget|^whatsapp|^wordpress|^xenu link sleuth|^yahoo|^yandex|^zdm/\\d|^zoom marketplace/|agent|analyzer|archive|ask jeeves/teoma|audit|bit\\.ly/|bluecoat drtr|browsex|burpcollaborator|capture|catch|check\\b|checker|chrome-lighthouse|chromeframe|classifier|cloudflare|convertify|crawl|cypress/|dareboost|datanyze|dejaclick|detect|dmbrowser|download|evc-batch/|exaleadcloudview|feed|fetcher|firephp|functionize|grab|headless|httrack|hubspot marketing grader|hydra|ibisbrowser|infrawatch|insight|inspect|iplabel|java(?!;)|library|linkcheck|mail\\.ru/|manager|measure|neustar wpm|node|nutch|offbyone|onetrust|optimize|pageburst|pagespeed|parser|perl|phantomjs|pingdom|powermarks|preview|proxy|ptst[ /]\\d|retriever|rexx;|rigor|rss\\b|scrape|server|sogou|sparkler/|speedcurve|spider|splash|statuscake|supercleaner|synapse|synthetic|tools|torrent|transcoder|url|validator|virtuoso|wappalyzer|webglance|webkit2png|whatcms/|xtate/";
// src/index.ts
var naivePattern = /bot|crawl|http|lighthouse|scan|search|spider/i;
var pattern;
function getPattern() {
if (pattern instanceof RegExp) {
return pattern;
}
try {
pattern = new RegExp(fullPattern, "i");
} catch (error) {
pattern = naivePattern;
}
return pattern;
}
var list = patterns_default;
var isbotNaive = (userAgent) => Boolean(userAgent) && naivePattern.test(userAgent);
function isbot(userAgent) {
return Boolean(userAgent) && getPattern().test(userAgent);
}
var createIsbot = (customPattern) => (userAgent) => Boolean(userAgent) && customPattern.test(userAgent);
var createIsbotFromList = (list2) => {
const pattern2 = new RegExp(list2.join("|"), "i");
return (userAgent) => Boolean(userAgent) && pattern2.test(userAgent);
};
var isbotMatch = (userAgent) => {
var _a, _b;
return (_b = (_a = userAgent == null ? void 0 : userAgent.match(getPattern())) == null ? void 0 : _a[0]) != null ? _b : null;
};
var isbotMatches = (userAgent) => list.map((part) => {
var _a;
return (_a = userAgent == null ? void 0 : userAgent.match(new RegExp(part, "i"))) == null ? void 0 : _a[0];
}).filter(Boolean);
var isbotPattern = (userAgent) => {
var _a;
return userAgent ? (_a = list.find((pattern2) => new RegExp(pattern2, "i").test(userAgent))) != null ? _a : null : null;
};
var isbotPatterns = (userAgent) => userAgent ? list.filter((pattern2) => new RegExp(pattern2, "i").test(userAgent)) : [];
export {
createIsbot,
createIsbotFromList,
getPattern,
isbot,
isbotMatch,
isbotMatches,
isbotNaive,
isbotPattern,
isbotPatterns,
list
};