@fairfetch/fair-fetch
Version:
Protect your site from AI scrapers by adding invisible noise to your site which confuses AI bots while keeping your site looking and functioning normally for your human visitors.
100 lines (99 loc) • 3.63 kB
JavaScript
// googlebot-validator.js
// --------------------------------------------------
// npm install node-fetch ip-cidr ipaddr.js
import fetch from 'node-fetch';
import IPCIDR from 'ip-cidr';
import { createRequire } from 'module';
const require = createRequire(import.meta.url);
const ipaddr = require('ipaddr.js');
export var BotAgent;
(function (BotAgent) {
BotAgent[BotAgent["GoogleBot"] = 0] = "GoogleBot";
BotAgent[BotAgent["BingBot"] = 1] = "BingBot";
BotAgent[BotAgent["DuckDuckBot"] = 2] = "DuckDuckBot";
BotAgent[BotAgent["Unknown"] = 3] = "Unknown";
})(BotAgent || (BotAgent = {}));
const GOOGLE_JSON_URLS = {
common: 'https://developers.google.com/search/apis/ipranges/googlebot.json',
special: 'https://developers.google.com/search/apis/ipranges/special-crawlers.json',
userFetch: 'https://developers.google.com/search/apis/ipranges/user-triggered-fetchers.json',
userFetchGoogle: 'https://developers.google.com/search/apis/ipranges/user-triggered-fetchers-google.json',
general: 'https://www.gstatic.com/ipranges/goog.json',
};
const BING_JSON_URLS = {
common: 'https://www.bing.com/toolbox/bingbot.json',
};
const DUCKDUCKGO_JSON_URLS = {
common: 'https://duckduckgo.com/duckduckbot.json',
};
// const APPLE_JSON_URLS = {
// common: 'https://search.developer.apple.com/applebot.json',
// };
// In-memory store of all CIDR ranges
let cidrsByAgent = {
[BotAgent.GoogleBot]: [],
[BotAgent.BingBot]: [],
[BotAgent.DuckDuckBot]: [],
[BotAgent.Unknown]: [],
};
/**
* Fetch one of Google's JSON lists and extract all CIDR prefixes.
*/
async function fetchCidrs(url) {
const res = await fetch(url);
if (!res.ok)
throw new Error(`Failed to fetch ${url}: ${res.status}`);
const json = await res.json();
return [
...(json.prefixes || []).flatMap((p) => p.ipv4Prefix ? [p.ipv4Prefix] : []),
...(json.prefixes || []).flatMap((p) => p.ipv6Prefix ? [p.ipv6Prefix] : []),
];
}
/**
* Load (or reload) all Google IP ranges into memory.
* Call once at startup, and schedule a daily refresh.
*/
export async function loadIpRanges() {
try {
cidrsByAgent[BotAgent.GoogleBot] = (await Promise.all(Object.values(GOOGLE_JSON_URLS).map(fetchCidrs))).flat();
cidrsByAgent[BotAgent.BingBot] = (await Promise.all(Object.values(BING_JSON_URLS).map(fetchCidrs))).flat();
cidrsByAgent[BotAgent.DuckDuckBot] = (await Promise.all(Object.values(DUCKDUCKGO_JSON_URLS).map(fetchCidrs))).flat();
// cidrsByAgent[BotAgent.AppleBot] = (await Promise.all(
// Object.values(APPLE_JSON_URLS).map(fetchCidrs),
// )).flat();
console.info(`Loaded CIDR ranges.`);
}
catch (err) {
console.error('Error loading Google IP ranges:', err);
}
}
// Kick off first load, then refresh every 24h
loadIpRanges();
setInterval(loadIpRanges, 24 * 60 * 60 * 1000);
/**
* Check if a given IP (v4 or v6) belongs to any of the loaded CIDR ranges.
* Returns true if it does (i.e. it’s a genuine Google crawler or fetcher).
*/
export function isBotRequest(ip, agent) {
let addr;
try {
addr = ipaddr.parseCIDR(ip);
}
catch (_a) {
return false;
}
let cidrList = cidrsByAgent[agent];
if (!cidrList || cidrList.length === 0) {
console.warn(`No CIDR ranges loaded for agent: ${agent}`);
return false;
}
return cidrList.some((cidr) => {
try {
const range = new IPCIDR(cidr);
return range.contains(addr.toString());
}
catch (_a) {
return false;
}
});
}