UNPKG

seo-middleware-nextjs

Version:

Next.js SEO — boosting organic traffic from SERPs, AI Chat Bots, and GPT

185 lines (184 loc) 14.7 kB
import { NextResponse } from 'next/server'; import AhoCorasick from 'modern-ahocorasick'; const RENDERING_ENDPOINT = 'https://render.ostr.io'; export const IGNORE_EXTENSIONS = ['3ds', '3g2', '3gp', '3gpp', '7z', 'a', 'aac', 'aaf', 'adp', 'ai', 'aif', 'aiff', 'alz', 'ape', 'apk', 'appcache', 'ar', 'arj', 'asf', 'asx', 'atom', 'au', 'avchd', 'avi', 'bak', 'bbaw', 'bh', 'bin', 'bk', 'bmp', 'btif', 'bz2', 'bzip2', 'cab', 'caf', 'cco', 'cgm', 'class', 'cmx', 'cpio', 'cr2', 'crt', 'crx', 'css', 'csv', 'cur', 'dat', 'deb', 'der', 'dex', 'djvu', 'dll', 'dmg', 'dng', 'doc', 'docm', 'docx', 'dot', 'dotm', 'dra', 'drc', 'ds_store', 'dsk', 'dts', 'dtshd', 'dvb', 'dwg', 'dxf', 'ear', 'ecelp4800', 'ecelp7470', 'ecelp9600', 'egg', 'eol', 'eot', 'eps', 'epub', 'exe', 'f4a', 'f4b', 'f4p', 'f4v', 'fbs', 'fh', 'fla', 'flac', 'fli', 'flv', 'fpx', 'fst', 'fvt', 'g3', 'geojson', 'gif', 'graffle', 'gz', 'gzip', 'h261', 'h263', 'h264', 'hqx', 'htc', 'ico', 'ief', 'img', 'ipa', 'iso', 'jad', 'jar', 'jardiff', 'jng', 'jnlp', 'jpeg', 'jpg', 'jpgv', 'jpm', 'js', 'jxr', 'key', 'kml', 'kmz', 'ktx', 'less', 'lha', 'lvp', 'lz', 'lzh', 'lzma', 'lzo', 'm2v', 'm3u', 'm4a', 'm4p', 'm4v', 'map', 'manifest', 'mar', 'markdown', 'md', 'mdi', 'mdown', 'mdwn', 'mht', 'mid', 'midi', 'mj2', 'mka', 'mkd', 'mkdn', 'mkdown', 'mkv', 'mml', 'mmr', 'mng', 'mobi', 'mov', 'movie', 'mp2', 'mp3', 'mp4', 'mp4a', 'mpe', 'mpeg', 'mpg', 'mpga', 'mpv', 'msi', 'msm', 'msp', 'mxf', 'mxu', 'nef', 'npx', 'nsv', 'numbers', 'o', 'oex', 'oga', 'ogg', 'ogv', 'opus', 'otf', 'pages', 'pbm', 'pcx', 'pdb', 'pdf', 'pea', 'pem', 'pgm', 'pic', 'pl', 'pm', 'png', 'pnm', 'pot', 'potm', 'potx', 'ppa', 'ppam', 'ppm', 'pps', 'ppsm', 'ppsx', 'ppt', 'pptm', 'pptx', 'prc', 'ps', 'psd', 'pya', 'pyc', 'pyo', 'pyv', 'qt', 'ra', 'rar', 'ras', 'raw', 'rdf', 'rgb', 'rip', 'rlc', 'rm', 'rmf', 'rmvb', 'ron', 'roq', 'rpm', 'rss', 'rtf', 'run', 'rz', 's3m', 's7z', 'safariextz', 'scpt', 'sea', 'sgi', 'shar', 'sil', 'sit', 'slk', 'smv', 'so', 'sub', 'svg', 'svgz', 'svi', 'swf', 'tar', 'tbz', 'tbz2', 'tcl', 'tga', 'tgz', 'thmx', 'tif', 'tiff', 'tk', 'tlz', 'topojson', 'torrent', 'ttc', 'ttf', 'txt', 'txz', 'udf', 'uvh', 'uvi', 'uvm', 'uvp', 'uvs', 'uvu', 'vcard', 'vcf', 'viv', 'vob', 'vtt', 'war', 'wav', 'wax', 'wbmp', 'wdp', 'weba', 'webapp', 'webm', 'webmanifest', 'webp', 'whl', 'wim', 'wm', 'wma', 'wml', 'wmlc', 'wmv', 'wmx', 'woff', 'woff2', 'wvx', 'xbm', 'xif', 'xla', 'xlam', 'xloc', 'xls', 'xlsb', 'xlsm', 'xlsx', 'xlt', 'xltm', 'xltx', 'xm', 'xmind', 'xml', 'xpi', 'xpm', 'xsl', 'xwd', 'xz', 'yuv', 'z', 'zip', 'zipx']; export const BOT_AGENTS = ['.net crawler', '360spider', '50.nu', '8bo crawler bot', 'aboundex', 'accoona', 'adldxbot', 'adsbot-google', 'ahrefsbot', 'altavista', 'appengine-google', 'applebot', 'archiver', 'arielisbot', 'ask jeeves', 'auskunftbot', 'baidumobaider', 'baiduspider', 'becomebot', 'bingbot', 'bingpreview', 'bitbot', 'bitlybot', 'blitzbot', 'blogbridge', 'boardreader', 'botseer', 'catchbot', 'catchpoint bot', 'charlotte', 'checklinks', 'cliqzbot', 'clumboot', 'coccocbot', 'converacrawler', 'crawl-e', 'crawlconvera', 'dataparksearch', 'daum', 'deusu', 'developers.google.com/+/web/snippet', 'discordbot', 'dotbot', 'duckduckbot', 'elefent', 'embedly', 'evernote', 'exabot', 'facebookbot', 'facebookexternalhit', 'fatbot', 'fdse robot', 'feed seeker bot', 'feedfetcher', 'femtosearchbot', 'findlinks', 'flamingo_searchengine', 'flipboard', 'followsite bot', 'furlbot', 'fyberspider', 'gaisbot', 'galaxybot', 'geniebot', 'genieo', 'gigablast', 'gigabot', 'girafabot', 'gomezagent', 'gonzo1', 'google sketchup', 'google-structured-data-testing-tool', 'googlebot', 'haosouspider', 'heritrix', 'holmes', 'hoowwwer', 'htdig', 'ia_archiver', 'idbot', 'infuzapp', 'innovazion crawler', 'internetarchive', 'iqdb', 'iskanie', 'istellabot', 'izsearch.com', 'kaloogabot', 'kaz.kz_bot', 'kd bot', 'konqueror', 'kraken', 'kurzor', 'larbin', 'leia', 'lesnikbot', 'linguee bot', 'linkaider', 'linkapediabot', 'linkedinbot', 'lite bot', 'llaut', 'lookseek', 'lycos', 'mail.ru_bot', 'masidani_bot', 'masscan', 'mediapartners-google', 'metajobbot', 'mj12bot', 'mnogosearch', 'mogimogi', 'mojeekbot', 'motominerbot', 'mozdex', 'msiecrawler', 'msnbot', 'msrbot', 'netpursual', 'netresearch', 'netvibes', 'newsgator', 'ng-search', 'nicebot', 'nutchcvs', 'nuzzel', 'nymesis', 'objectssearch', 'odklbot', 'omgili', 'oovoo', 'oozbot', 'openfosbot', 'orangebot', 'orbiter', 'org_bot', 'outbrain', 'pagepeeker', 'pagesinventory', 'parsijoobot', 'paxleframework', 'peeplo screenshot bot', 'pinterest', 'plantynet_webrobot', 'plukkie', 'pompos', 'psbot', 'quora link preview', 'qwantify', 'read%20later', 'reaper', 'redcarpet', 'redditbot', 'retreiver', 'riddler', 'rival iq', 'rogerbot', 'saucenao', 'scooter', 'scrapy', 'scrubby', 'searchie', 'searchsight', 'seekbot', 'semanticdiscovery', 'seznambot', 'showyoubot', 'simplepie', 'simpy', 'sitelockspider', 'skypeuripreview', 'slack-imgproxy', 'slackbot', 'slurp', 'snappy', 'sogou', 'solofield', 'speedy spider', 'speedyspider', 'sputnikbot', 'stackrambler', 'teeraidbot', 'teoma', 'theusefulbot', 'thumbshots.ru', 'thumbshotsbot', 'tineye', 'toweya.com', 'toweyabot', 'tumblr', 'tweetedtimes', 'tweetmemebot', 'twitterbot', 'url2png', 'vagabondo', 'vebidoobot', 'viber', 'visionutils', 'vkshare', 'voilabot', 'vortex', 'votay bot', 'voyager', 'w3c_validator', 'wasalive.bot', 'web-sniffer', 'websquash.com', 'webthumb', 'whatsapp', 'whatweb', 'wire', 'wotbox', 'yacybot', 'yahoo', 'yandex', 'yeti', 'yisouspider', 'yodaobot', 'yooglifetchagent', 'yoozbot', 'yottaamonitor', 'yowedo', 'zao-crawler', 'zebot_www.ze.bz', 'zooshot', 'zyborg', 'ai2bot', 'amazonbot', 'anthropic.com', 'bard', 'bytespider', 'ccbot', 'chatgpt-user', 'claude-web', 'claudebot', 'cohere-ai', 'deepseek', 'diffbot', 'duckassistbot', 'gemini', 'google-extended', 'gptbot', 'grok', 'meta-external', 'mistralai', 'oai-searchbot', 'openai.com', 'perplexity.ai', 'perplexitybot', 'xai', 'youbot']; const DICT = { escapedFragment: '_escaped_fragment_', wellknownPath: '/.well-known/', slash: '/', pipe: '|', dot: '.', empty: '', colon: ':', allowedMethods: new Set(['GET', 'HEAD']), args: { bot: '&bot=' }, service: { origin: 'ostr.io', originTld: '.ostr.io', authStartsWith: 'Basic ', }, headers: { ua: 'user-agent', auth: 'authorization', }, keepReqHeaders: ['user-agent', 'accept-language'], ignoreRespHeaders: ['age', 'alt-svc', 'cache-status', 'cf-connecting-ip', 'cf-ipcountry', 'cf-cache-status', 'cf-ray', 'cf-request-id', 'cnection', 'cneonction', 'connection', 'content-encoding', 'content-length', 'date', 'etag', 'expect-ct', 'expires', 'keep-alive', 'last-modified', 'link', 'nel', 'nncoection', 'pragma', 'server', 'set-cookie', 'status', 'transfer-encoding', 'report-to', 'vary', 'via', 'www-authenticate', 'x-accel-buffering', 'x-accel-charset', 'x-accel-expires', 'x-accel-limit-rate', 'x-accel-redirect', 'x-ostrio-domain', 'x-powered-by', 'x-preprender-status', 'x-prerender-status', 'x-real-ip', 'x-runtime'], }; const beginningSlashRe = /^\//; const trailingSlashRe = /\/$/; export class SEOMiddleware { auth; supportEscapedFragment; keepGetQuery; ignoredPaths; logger; ignoredExtensions; botAgents; renderingEndpoint; renderingBase; retries; debug; constructor(opts) { this.logger = opts.logger ?? console; this.auth = opts.auth || process.env.PRERENDER_SERVICE_AUTH || process.env.SPIDERABLE_SERVICE_AUTH || process.env.OSTR_AUTH || ''; if (!this.auth) { throw new Error('[seo-middleware-nextjs] missing {auth} option! Passed via {auth} option or set via PRERENDER_SERVICE_URL or OSTR_AUTH environment variable'); } if (!this.auth.startsWith(DICT.service.authStartsWith) || this.auth.includes(DICT.colon)) { throw new Error('[seo-middleware-nextjs] {auth} option is misconfigured or wrong value is used!'); } this.supportEscapedFragment = opts.supportEscapedFragment ?? true; this.keepGetQuery = opts.keepGetQuery ?? true; this.ignoredPaths = opts.ignoredPaths ?? false; this.ignoredExtensions = new Set(opts.ignoredExtensions ?? IGNORE_EXTENSIONS); this.botAgents = new AhoCorasick(opts.botAgents ?? BOT_AGENTS); this.renderingEndpoint = opts.renderingEndpoint ?? RENDERING_ENDPOINT; if (this.renderingEndpoint.endsWith(DICT.slash)) { this.renderingEndpoint = this.renderingEndpoint.slice(0, -1); } this.renderingBase = `${this.renderingEndpoint}/?url=`; this.retries = opts.retries ?? 2; this.debug = opts.debug || process.env.DEBUG === 'true' || false; this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [new SEOMiddleware()]', 'OK', opts); } createMiddleware() { const self = this; this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [createMiddleware]', 'OK'); return async function middleware(req) { const url = req.nextUrl.clone(); self.debug && self.logger.info('[seo-middleware-nextjs] [DEBUG] [middleware]', 'received request', url.toString()); const escapedFragment = url.searchParams.get(DICT.escapedFragment); if (typeof escapedFragment !== 'string' && !self.shouldPrerender(req, url)) { return NextResponse.next(); } const ua = (req.headers.get(DICT.headers.ua) || DICT.empty).toLowerCase(); let fetchUrl = `${url.origin}`; if (self.supportEscapedFragment && typeof escapedFragment === 'string') { url.searchParams.delete(DICT.escapedFragment); if (escapedFragment.length) { fetchUrl += `${url.pathname.replace(trailingSlashRe, DICT.empty)}${DICT.slash}${escapedFragment.replace(beginningSlashRe, DICT.empty)}`; } else { fetchUrl += url.pathname.replace(trailingSlashRe, DICT.empty); } if (!self.shouldPrerender(req, new URL(fetchUrl))) { return NextResponse.next(); } } else { if (!self.checkBot(ua)) { return NextResponse.next(); } fetchUrl += url.pathname; } if (self.keepGetQuery && url.search && url.search.length > 1) { fetchUrl += url.search; } const headers = new Headers(); headers.set(DICT.headers.auth, self.auth); for (const h of DICT.keepReqHeaders) { if (req.headers.has(h)) { headers.set(h, req.headers.get(h)); } } const renderUrl = new URL(`${self.renderingBase}${encodeURIComponent(fetchUrl)}${DICT.args.bot}${encodeURIComponent(ua)}`); try { const res = await self.sendRequest(renderUrl, headers, 1); if (res) { const outHeaders = new Headers(res.headers); for (const h of DICT.ignoreRespHeaders) { outHeaders.delete(h); } return new NextResponse(res.body, { status: res.status, headers: outHeaders, }); } return NextResponse.next(); } catch (fetchError) { self.logger.warn('[seo-middleware-nextjs] [WARN] [middleware]', url.toString(), 'CAUGHT ERROR; FALLBACK TO NextResponse.next()', fetchError); return NextResponse.next(); } }; } checkStatic(path) { const lastDot = path.lastIndexOf(DICT.dot); const lastSlash = path.lastIndexOf(DICT.slash); const ext = lastDot > lastSlash ? path.slice(lastDot + 1) : DICT.empty; if (ext && this.ignoredExtensions.has(ext)) { return true; } return false; } checkBot(ua) { if (!ua || !this.botAgents.match(ua)) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [checkBot]', 'IGNORED: NOT A BOT'); return false; } return true; } shouldPrerender(req, url) { if (!DICT.allowedMethods.has(req.method)) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [shouldPrerender]', url.toString(), 'METHOD NOT ALLOWED'); return false; } const path = url.pathname.toLowerCase(); if (url.hostname === DICT.service.origin || url.hostname.endsWith(DICT.service.originTld) || path.includes(DICT.wellknownPath)) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [shouldPrerender]', url.toString(), 'IGNORED: GENERAL RULES'); return false; } if (this.checkStatic(path)) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [shouldPrerender]', url.toString(), 'IGNORED: IS STATIC'); return false; } if (this.ignoredPaths instanceof RegExp && this.ignoredPaths.test(path)) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [shouldPrerender]', url.toString(), 'IGNORED: BY PATH'); return false; } this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [shouldPrerender]', url.toString(), 'WILL PRERENDER'); return true; } async sendRequest(url, headers, attempt = 1) { const signal = AbortSignal.timeout(25000); this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [sendRequest]', url.toString(), 'SENDING'); try { const res = await fetch(url, { headers, signal, }); return res; } catch (requestError) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [sendRequest]', url.toString(), 'CAUGHT AN ERROR', requestError); const nextAttempt = attempt + 1; if (nextAttempt <= this.retries) { this.debug && this.logger.info('[seo-middleware-nextjs] [DEBUG] [sendRequest]', url.toString(), 'RETRY REQUEST'); return await this.sendRequest(url, headers, nextAttempt); } this.logger.warn('[seo-middleware-nextjs] [WARN] [sendRequest]', url.toString(), 'CAUGHT AN ERROR AFTER RETRY', requestError); } return null; } }