studiocms
Version:
Astro Native CMS for AstroDB. Built from the ground up by the Astro community.
199 lines (198 loc) • 6.28 kB
JavaScript
import path from "node:path";
function validateHost(host, logger) {
const hostPattern = /^(?=.{1,253}$)(?:(?!-)[a-zA-Z0-9-]{1,63}(?<!-)\.)+[a-zA-Z]{2,63}$/;
if (typeof host !== "string") {
throwMsg("Host must be a string", "error", logger);
}
if (!hostPattern.test(host)) {
throwMsg("Host is invalid", "error", logger);
}
}
function generateHostContent(config, logger) {
let content = "";
if (config.host === true) {
} else if (config.host === false) {
} else if (typeof config.host === "number") {
validateHost(config.host, logger);
} else if (typeof config.host === "string" && config.host !== "localhost") {
validateHost(config.host, logger);
content += `Host: ${config.host}
`;
}
return content;
}
function validateUrl(url, logger) {
const urlPattern = /^https?:\/\/[^\s/$.?#].[^\s]*\.(xml|txt|html|xml.gz|txt.gz|json|xhtml)$/i;
if (!urlPattern.test(url)) {
throwMsg("sitemap [URL is invalid or not a valid sitemap file.]", true, logger);
}
}
function generateSitemapContent(config, siteHref, logger) {
let content = "";
if (config.sitemap === true) {
content += `Sitemap: ${siteHref}sitemap-index.xml
`;
} else if (typeof config.sitemap === "number") {
throwMsg("sitemap [URL is invalid or not a valid sitemap file.]", true, logger);
} else if (typeof config.sitemap === "string") {
validateUrl(config.sitemap, logger);
content += `Sitemap: ${config.sitemap}
`;
} else if (Array.isArray(config.sitemap)) {
for (const url of config.sitemap) {
validateUrl(url, logger);
content += `Sitemap: ${url}
`;
}
}
return content;
}
function throwMsg(msg, type, logger) {
const sentenceHead = "\x1B[1mRefer:\x1B[22m";
const failure = (message) => {
logger.info(`\x1B[31mFailure! [${message}]\x1B[39m`);
};
const warn = (message) => {
logger.warn(`Skipped! [${message}].`);
};
switch (type) {
case "warn":
warn(msg);
break;
case "error":
failure(msg);
throw new Error(`${msg}`);
case true:
failure(msg);
throw new Error(
`${msg}
${sentenceHead}
Visit \x1B[4m${"https://developers.google.com/search/docs/crawling-indexing/robots/create-robots-txt#useful-robots.txt-rules"}\x1B[24m for instructions.`
);
default:
failure(msg);
throw new Error(
`${msg}
${sentenceHead}
Visit \x1B[4m${"https://yandex.com/support/webmaster/controlling-robot/robots-txt.html#recommend"}\x1B[24m for instructions.`
);
}
}
function generateContent(config, siteMapHref, logger) {
let content = "";
for (const policy of config.policy ?? []) {
config?.policy?.forEach((policy2, index) => {
if (!policy2.userAgent) {
throwMsg(
`policy[${index}].userAgent [Required, one or more per group].
${JSON.stringify(policy2, null, 2)}`,
!!policy2.userAgent,
logger
);
}
if (!policy2.allow && !policy2.disallow || policy2.allow?.length === 0 && policy2.disallow?.length === 0) {
throwMsg(
`policy[${index}] [At least one or more 'disallow' or 'allow' entries per rule].
${JSON.stringify(policy2, null, 2)}`,
!policy2.allow && !policy2.disallow,
logger
);
}
if (policy2.crawlDelay && typeof policy2.crawlDelay !== "number") {
throwMsg(
`policy[${index}].crawlDelay [Must be number].
${JSON.stringify(policy2, null, 2)}`,
false,
logger
);
} else if (policy2.crawlDelay !== void 0 && policy2?.crawlDelay < 0) {
throwMsg(
`policy[${index}].crawlDelay [Must be a positive number].
${JSON.stringify(policy2, null, 2)}`,
false,
logger
);
} else if (policy2.crawlDelay !== void 0 && (policy2?.crawlDelay < 0.1 || policy2.crawlDelay > 60)) {
throwMsg(
`policy[${index}].crawlDelay [Must be between 0.1 and 60 seconds].
${JSON.stringify(policy2, null, 2)}`,
false,
logger
);
}
});
if (policy.userAgent) {
const userAgents = Array.isArray(policy.userAgent) ? (
/* v8 ignore next */
policy.userAgent
) : [policy.userAgent || "*"];
for (const userAgent of userAgents) {
if (userAgent) {
content += `User-agent: ${userAgent}
`;
}
}
}
if (policy.allow) {
const allowPaths = Array.isArray(policy.allow) ? policy.allow : [policy.allow];
for (const path2 of allowPaths) {
content += `Allow: ${path2}
`;
}
}
if (policy.disallow) {
const disallowPaths = Array.isArray(policy.disallow) ? policy.disallow : [policy.disallow];
for (const path2 of disallowPaths) {
content += `Disallow: ${path2}
`;
}
}
if (policy.crawlDelay) {
content += `Crawl-delay: ${policy.crawlDelay}
`;
}
if (policy.cleanParam) {
const cleanParams = Array.isArray(policy.cleanParam) ? (
/* v8 ignore next */
policy.cleanParam
) : [policy.cleanParam];
for (const param of cleanParams) {
content += `Clean-param: ${param}
`;
}
}
if (config.policy && policy !== config.policy[config.policy.length - 1]) {
content += "\n";
} else if (config.sitemap !== false) {
content += "\n# crawling rule(s) for above bots\n";
}
}
content += generateSitemapContent(config, siteMapHref, logger);
content += generateHostContent(config, logger);
return content;
}
function printInfo(fileSize, executionTime, logger, destDir) {
if (fileSize > 10) {
console.log(`
\x1B[42m\x1B[30m generating 'robots.txt' file \x1B[39m\x1B[0m`);
const warnMsg = [
`
\x1B[33m(!) Keep your 'robots.txt' file size under 10 KB for best crawling results.`,
"- To keep it low, only include directives that are necessary for your site.",
"- Remove rules for pages that no longer exist to avoid bloat.\x1B[0m\n"
];
console.log(`${warnMsg.join("\n")}`);
}
logger.info(
`\`robots.txt\` (${fileSize}KB) created at \`${path.relative(process.cwd(), destDir)}\` in ${executionTime}ms`
);
}
export {
generateContent,
generateHostContent,
generateSitemapContent,
printInfo,
throwMsg,
validateHost,
validateUrl
};