es6-crawler-detect
Version:
This is an ES6 adaptation of the original PHP library CrawlerDetect, this library will help you detect bots/crawlers/spiders vie the useragent.
98 lines • 3.61 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Crawler = void 0;
const crawlers_1 = require("./crawler/crawlers");
const exclusions_1 = require("./crawler/exclusions");
const headers_1 = require("./crawler/headers");
class Crawler {
crawlers;
headers;
exclusions;
request;
compiledRegexList;
compiledExclusions;
httpHeaders;
userAgent;
matches;
constructor(request, headers, userAgent) {
this.crawlers = new crawlers_1.Crawlers();
this.headers = new headers_1.Headers();
this.exclusions = new exclusions_1.Exclusions();
this.request = request ?? {};
// The regex-list must not be used with g-flag!
// See: https://stackoverflow.com/questions/1520800/why-does-a-regexp-with-global-flag-give-wrong-results
this.compiledRegexList = this.compileRegex(this.crawlers.getAll(), 'i');
// The exclusions should be used with g-flag in order to remove each value.
this.compiledExclusions = this.compileRegex(this.exclusions.getAll(), 'gi');
this.httpHeaders = this.setHttpHeaders(headers);
this.userAgent = this.setUserAgent(userAgent);
}
compileRegex(patterns, flags) {
return new RegExp(patterns.join('|'), flags);
}
setHttpHeaders(headers) {
// Use the Request headers if httpHeaders is not defined
if (!headers || Object.keys(headers).length === 0) {
if (Object.keys(this.request).length) {
if (this.request.headers) {
return this.request.headers;
}
}
return '';
}
// Save the headers.
return headers;
}
setUserAgent(userAgent) {
if (!userAgent?.length) {
userAgent = '';
for (const header of this.getUaHttpHeaders()) {
if (typeof this.httpHeaders === 'object' &&
!Array.isArray(this.httpHeaders)) {
if (Object.hasOwn(this.httpHeaders, header.toLowerCase())) {
const headerValue = this.httpHeaders[header.toLowerCase()];
if (typeof headerValue === 'string') {
const separator = userAgent.length > 0 ? ' ' : '';
userAgent += separator + headerValue;
}
else if (Array.isArray(headerValue)) {
const separator = userAgent.length > 0 ? ' ' : '';
userAgent += separator + headerValue.join(' ');
}
}
}
}
}
return userAgent;
}
getUaHttpHeaders() {
return this.headers.getAll();
}
getMatches() {
if (this.matches !== undefined) {
if (this.matches?.length) {
return this.matches[0];
}
return null;
}
return {};
}
isCrawler(userAgent) {
if (Buffer.byteLength(userAgent ?? '', 'utf8') > 4096) {
return false;
}
let agent = userAgent ?? this.userAgent;
// test on compiled regx
agent = agent.replace(this.compiledExclusions, '');
if (agent.trim().length === 0) {
return false;
}
const matches = this.compiledRegexList.exec(agent);
if (matches) {
this.matches = matches;
}
return matches !== null && matches.length > 0;
}
}
exports.Crawler = Crawler;
//# sourceMappingURL=crawler.js.map