UNPKG

studiocms

Version:

Astro Native CMS for AstroDB. Built from the ground up by the Astro community.

199 lines (198 loc) 6.28 kB
import path from "node:path"; function validateHost(host, logger) { const hostPattern = /^(?=.{1,253}$)(?:(?!-)[a-zA-Z0-9-]{1,63}(?<!-)\.)+[a-zA-Z]{2,63}$/; if (typeof host !== "string") { throwMsg("Host must be a string", "error", logger); } if (!hostPattern.test(host)) { throwMsg("Host is invalid", "error", logger); } } function generateHostContent(config, logger) { let content = ""; if (config.host === true) { } else if (config.host === false) { } else if (typeof config.host === "number") { validateHost(config.host, logger); } else if (typeof config.host === "string" && config.host !== "localhost") { validateHost(config.host, logger); content += `Host: ${config.host} `; } return content; } function validateUrl(url, logger) { const urlPattern = /^https?:\/\/[^\s/$.?#].[^\s]*\.(xml|txt|html|xml.gz|txt.gz|json|xhtml)$/i; if (!urlPattern.test(url)) { throwMsg("sitemap [URL is invalid or not a valid sitemap file.]", true, logger); } } function generateSitemapContent(config, siteHref, logger) { let content = ""; if (config.sitemap === true) { content += `Sitemap: ${siteHref}sitemap-index.xml `; } else if (typeof config.sitemap === "number") { throwMsg("sitemap [URL is invalid or not a valid sitemap file.]", true, logger); } else if (typeof config.sitemap === "string") { validateUrl(config.sitemap, logger); content += `Sitemap: ${config.sitemap} `; } else if (Array.isArray(config.sitemap)) { for (const url of config.sitemap) { validateUrl(url, logger); content += `Sitemap: ${url} `; } } return content; } function throwMsg(msg, type, logger) { const sentenceHead = "\x1B[1mRefer:\x1B[22m"; const failure = (message) => { logger.info(`\x1B[31mFailure! [${message}]\x1B[39m`); }; const warn = (message) => { logger.warn(`Skipped! [${message}].`); }; switch (type) { case "warn": warn(msg); break; case "error": failure(msg); throw new Error(`${msg}`); case true: failure(msg); throw new Error( `${msg} ${sentenceHead} Visit \x1B[4m${"https://developers.google.com/search/docs/crawling-indexing/robots/create-robots-txt#useful-robots.txt-rules"}\x1B[24m for instructions.` ); default: failure(msg); throw new Error( `${msg} ${sentenceHead} Visit \x1B[4m${"https://yandex.com/support/webmaster/controlling-robot/robots-txt.html#recommend"}\x1B[24m for instructions.` ); } } function generateContent(config, siteMapHref, logger) { let content = ""; for (const policy of config.policy ?? []) { config?.policy?.forEach((policy2, index) => { if (!policy2.userAgent) { throwMsg( `policy[${index}].userAgent [Required, one or more per group]. ${JSON.stringify(policy2, null, 2)}`, !!policy2.userAgent, logger ); } if (!policy2.allow && !policy2.disallow || policy2.allow?.length === 0 && policy2.disallow?.length === 0) { throwMsg( `policy[${index}] [At least one or more 'disallow' or 'allow' entries per rule]. ${JSON.stringify(policy2, null, 2)}`, !policy2.allow && !policy2.disallow, logger ); } if (policy2.crawlDelay && typeof policy2.crawlDelay !== "number") { throwMsg( `policy[${index}].crawlDelay [Must be number]. ${JSON.stringify(policy2, null, 2)}`, false, logger ); } else if (policy2.crawlDelay !== void 0 && policy2?.crawlDelay < 0) { throwMsg( `policy[${index}].crawlDelay [Must be a positive number]. ${JSON.stringify(policy2, null, 2)}`, false, logger ); } else if (policy2.crawlDelay !== void 0 && (policy2?.crawlDelay < 0.1 || policy2.crawlDelay > 60)) { throwMsg( `policy[${index}].crawlDelay [Must be between 0.1 and 60 seconds]. ${JSON.stringify(policy2, null, 2)}`, false, logger ); } }); if (policy.userAgent) { const userAgents = Array.isArray(policy.userAgent) ? ( /* v8 ignore next */ policy.userAgent ) : [policy.userAgent || "*"]; for (const userAgent of userAgents) { if (userAgent) { content += `User-agent: ${userAgent} `; } } } if (policy.allow) { const allowPaths = Array.isArray(policy.allow) ? policy.allow : [policy.allow]; for (const path2 of allowPaths) { content += `Allow: ${path2} `; } } if (policy.disallow) { const disallowPaths = Array.isArray(policy.disallow) ? policy.disallow : [policy.disallow]; for (const path2 of disallowPaths) { content += `Disallow: ${path2} `; } } if (policy.crawlDelay) { content += `Crawl-delay: ${policy.crawlDelay} `; } if (policy.cleanParam) { const cleanParams = Array.isArray(policy.cleanParam) ? ( /* v8 ignore next */ policy.cleanParam ) : [policy.cleanParam]; for (const param of cleanParams) { content += `Clean-param: ${param} `; } } if (config.policy && policy !== config.policy[config.policy.length - 1]) { content += "\n"; } else if (config.sitemap !== false) { content += "\n# crawling rule(s) for above bots\n"; } } content += generateSitemapContent(config, siteMapHref, logger); content += generateHostContent(config, logger); return content; } function printInfo(fileSize, executionTime, logger, destDir) { if (fileSize > 10) { console.log(` \x1B[42m\x1B[30m generating 'robots.txt' file \x1B[39m\x1B[0m`); const warnMsg = [ ` \x1B[33m(!) Keep your 'robots.txt' file size under 10 KB for best crawling results.`, "- To keep it low, only include directives that are necessary for your site.", "- Remove rules for pages that no longer exist to avoid bloat.\x1B[0m\n" ]; console.log(`${warnMsg.join("\n")}`); } logger.info( `\`robots.txt\` (${fileSize}KB) created at \`${path.relative(process.cwd(), destDir)}\` in ${executionTime}ms` ); } export { generateContent, generateHostContent, generateSitemapContent, printInfo, throwMsg, validateHost, validateUrl };