llmoptimizer
Version:
Generate an llms.txt summary of your website/docs for LLMs (framework-agnostic with Vite/Next/Nuxt/Astro/Remix helpers).
442 lines (439 loc) • 18.4 kB
JavaScript
;
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/integrations/docs.ts
var docs_exports = {};
__export(docs_exports, {
docsLLMs: () => docsLLMs
});
module.exports = __toCommonJS(docs_exports);
var import_node_path2 = __toESM(require("path"), 1);
var import_promises2 = __toESM(require("fs/promises"), 1);
var import_globby = require("globby");
// src/integrations/docs-helpers.ts
var import_node_path = __toESM(require("path"), 1);
var import_promises = __toESM(require("fs/promises"), 1);
async function inlineLocalPartials(raw, baseDir, cache) {
const importRegex = /^\s*import\s+([A-Za-z0-9_]+)\s+from\s+['"](\.\/[^'"\n]+)['"];?\s*$/gm;
const imports = [];
const replaced = raw.replace(importRegex, (line) => {
const mm = /^\s*import\s+([A-Za-z0-9_]+)\s+from\s+['"](\.\/[^'"\n]+)['"];?\s*$/.exec(line);
if (!mm) return "";
const name = mm[1];
const rel = mm[2];
if (!/\/_[^/]+\.(md|mdx)$/i.test(rel)) return "";
const abs = import_node_path.default.resolve(baseDir, rel);
imports.push({ name, abs, content: "" });
return "";
});
for (const imp of imports) {
if (!cache.has(imp.abs)) {
try {
let txt = await import_promises.default.readFile(imp.abs, "utf8");
txt = await inlineLocalPartials(txt, import_node_path.default.dirname(imp.abs), cache);
cache.set(imp.abs, txt);
} catch {
cache.set(imp.abs, "");
}
}
imp.content = cache.get(imp.abs) || "";
}
let out = replaced;
for (const imp of imports) {
const usageRe = new RegExp(`<${imp.name}\\s*/>`, "g");
out = out.replace(usageRe, imp.content);
}
return out;
}
function cleanContent(raw, opts) {
let s = raw;
if (opts.excludeImports) {
s = s.replace(/^\s*import\s+[^\n]+\n/gm, "").replace(/^\s*export\s+(const|let|var|default)\s+[^\n]*\n/gm, "");
}
if (opts.removeDuplicateHeadings) {
const lines = s.split(/\r?\n/);
const out = [];
let lastHeading = null;
for (const ln of lines) {
const h = ln.match(/^\s*#\s+(.+)$/);
if (h) {
lastHeading = h[1].trim();
out.push(ln);
continue;
}
if (lastHeading && ln.trim() === lastHeading) continue;
out.push(ln);
if (ln.trim()) lastHeading = null;
}
s = out.join("\n");
}
return s;
}
function extractFrontmatterAndTitle(s) {
let title = "";
let description;
let body = s;
const fm = s.match(/^---\n([\s\S]*?)\n---\n?/);
if (fm) {
body = s.slice(fm[0].length);
const block = fm[1];
const t = block.match(/^\s*title:\s*(.+)\s*$/m);
const d = block.match(/^\s*description:\s*(.+)\s*$/m);
if (t) title = stripQuotes(t[1].trim());
if (d) description = stripQuotes(d[1].trim());
}
if (!title) {
const h1 = body.match(/^\s*#\s+(.+)$/m);
if (h1) title = h1[1].trim();
}
if (!title) title = "Untitled";
return { title, description, content: body.trim() };
}
function stripQuotes(s) {
if (s.startsWith('"') && s.endsWith('"') || s.startsWith("'") && s.endsWith("'")) return s.slice(1, -1);
return s;
}
function toUrlPath(abs, ctx) {
const relFromRoot = import_node_path.default.relative(ctx.root, abs).replace(/\\/g, "/");
let segs = relFromRoot.split("/");
const drops = [ctx.docsDir.replace(/\\/g, "/"), ctx.blogDir?.replace(/\\/g, "/")].filter(Boolean);
if (drops.length) {
while (drops.includes(segs[0])) segs.shift();
}
if (ctx.ignorePaths?.length) segs = segs.filter((s) => !ctx.ignorePaths.includes(s));
if (ctx.addPaths?.length) segs = [...ctx.addPaths, ...segs];
const last = segs.pop() || "";
const base = last.replace(/\.(md|mdx)$/i, "");
segs.push(base);
return "/" + segs.filter(Boolean).join("/");
}
function toSiteUrl(cfg, slugPath) {
const base = (cfg.url ? cfg.url.replace(/\/$/, "") : "") + (cfg.baseUrl || "");
return base ? base.replace(/\/$/, "") + slugPath : slugPath;
}
function globToRegex(g) {
const esc = g.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*\*/g, "::DOUBLESTAR::").replace(/\*/g, "[^/]*").replace(/::DOUBLESTAR::/g, ".*");
return new RegExp("^" + esc + "$");
}
function orderDocs(docs, includeOrder, includeUnmatchedLast) {
if (!includeOrder || !includeOrder.length) return docs.slice();
const matched = [];
const rest = new Set(docs);
for (const pat of includeOrder) {
const re = globToRegex(pat);
for (const d of docs) {
if (rest.has(d) && (re.test(d.relPath) || re.test(d.url))) {
matched.push(d);
rest.delete(d);
}
}
}
if (includeUnmatchedLast) matched.push(...Array.from(rest));
return matched;
}
function filterDocs(docs, includePatterns, ignorePatterns) {
const inc = includePatterns.map(globToRegex);
const ig = (ignorePatterns || []).map(globToRegex);
return docs.filter((d) => inc.some((r) => r.test(d.relPath) || r.test(d.url))).filter((d) => !ig.some((r) => r.test(d.relPath) || r.test(d.url)));
}
function slugify(s) {
return s.toLowerCase().trim().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-");
}
function headingsFrom(content) {
const lines = content.split(/\r?\n/);
const hs = [];
for (const ln of lines) {
const m = ln.match(/^\s*#{1,4}\s+(.+)$/);
if (m) hs.push(m[1].trim());
}
return hs;
}
function wordCount(content) {
const noCode = content.replace(/```[\s\S]*?```/g, " ");
const plain = noCode.replace(/[`*_>#\-\[\]\(\)!]/g, " ");
const words = plain.split(/\s+/).filter(Boolean);
return words.length;
}
function tokenEstimate(content) {
return Math.max(0, Math.round(wordCount(content) * 1.3));
}
function renderLinksFile(ctx) {
const lines = [];
lines.push(`# ${ctx.title}`);
if (ctx.description) lines.push(`> ${ctx.description}`);
if (ctx.version) lines.push("", `Version: ${ctx.version}`);
if (ctx.rootContent) {
lines.push("", ctx.rootContent.trim(), "");
}
if (ctx.sections && ctx.sections.length) {
for (const sec of ctx.sections) {
lines.push("", `## ${sec.name}`);
for (const link of sec.links) {
const url = ctx.linkMapper(link.url);
const base = `- [${link.title}](${url})`;
lines.push(link.notes ? `${base}: ${link.notes}` : base);
}
}
} else {
lines.push("", "## Docs");
for (const d of ctx.docs) {
const url = ctx.linkMapper(d.url);
lines.push(`- [${d.title}](${url})`);
}
}
if (ctx.optionalLinks && ctx.optionalLinks.length) {
lines.push("", "## Optional");
for (const link of ctx.optionalLinks) {
const url = ctx.linkMapper(link.url);
const base = `- [${link.title}](${url})`;
lines.push(link.notes ? `${base}: ${link.notes}` : base);
}
}
lines.push("", "Generated By: LLMOPTIMIZER BY Huzaifa Shoukat");
return lines.join("\n") + "\n";
}
function autoSectionsFrom(docs) {
if (!docs.length) return void 0;
const groups = { "Getting Started": [], Guides: [], API: [], Tutorials: [], Reference: [], Docs: [] };
for (const d of docs) {
const p = d.relPath.toLowerCase();
if (p.includes("getting-started") || p.includes("quick-start") || p.includes("quickstart")) groups["Getting Started"].push({ title: d.title, url: d.url });
else if (p.includes("/guide") || p.includes("guides")) groups["Guides"].push({ title: d.title, url: d.url });
else if (p.includes("/api") || p.includes("reference/api")) groups["API"].push({ title: d.title, url: d.url });
else if (p.includes("tutorial")) groups["Tutorials"].push({ title: d.title, url: d.url });
else if (p.includes("reference")) groups["Reference"].push({ title: d.title, url: d.url });
else groups["Docs"].push({ title: d.title, url: d.url });
}
const sections = Object.entries(groups).filter(([, links]) => links.length).map(([name, links]) => ({ name, links }));
return sections.length ? sections : void 0;
}
function renderCtx(byUrl, urls) {
const lines = [];
for (const u of urls) {
const d = byUrl.get(u);
if (!d) continue;
lines.push(`# ${d.title}
`);
if (d.description) lines.push(`> ${d.description}
`);
lines.push(d.content.trim(), "");
}
return lines.join("\n");
}
function renderFullFile(ctx) {
const lines = [];
lines.push(`# ${ctx.title}`);
if (ctx.description) lines.push(`> ${ctx.description}`);
if (ctx.version) lines.push("", `Version: ${ctx.version}`);
if (ctx.rootContent) {
lines.push("", ctx.rootContent.trim(), "");
}
lines.push("");
for (const d of ctx.docs) {
lines.push(`
# ${d.title}
`);
if (d.description) lines.push(`> ${d.description}
`);
lines.push(d.content, "");
}
lines.push("", "Generated By: LLMOPTIMIZER BY Huzaifa Shoukat");
return lines.join("\n") + "\n";
}
function fileNameForDoc(d) {
const base = slugify(d.title || "doc") || "doc";
return base + ".md";
}
function uniqueFileName(name, used) {
if (!used.has(name)) {
used.add(name);
return name;
}
const [base, ext] = name.split(/\.(?=[^.]+$)/);
let i = 1;
while (used.has(`${base}-${i}.${ext}`)) i++;
const nn = `${base}-${i}.${ext}`;
used.add(nn);
return nn;
}
function asGeneratedDoc(d) {
const lines = [];
lines.push(`# ${d.title}`);
if (d.description) lines.push(`
> ${d.description}
`);
lines.push(d.content);
return lines.join("\n") + "\n";
}
// src/integrations/docs.ts
function docsLLMs(options = {}) {
const opts = withDefaults(options);
return {
name: "llmoptimizer-docs",
async postBuild(props) {
const root = process.cwd();
const docsDir = import_node_path2.default.resolve(root, opts.docsDir);
const blogDir = opts.includeBlog ? import_node_path2.default.resolve(root, opts.blogDir) : void 0;
const allFiles = [];
const patterns = ["**/*.md", "**/*.mdx"];
const ig = ["**/_*.md", "**/_*.mdx", ...opts.ignoreFiles || []];
const docsFiles = await (0, import_globby.globby)(patterns, { cwd: docsDir, ignore: ig, absolute: true });
allFiles.push(...docsFiles);
if (blogDir) {
const blogFiles = await (0, import_globby.globby)(patterns, { cwd: blogDir, ignore: ig, absolute: true });
allFiles.push(...blogFiles);
}
const docs = [];
const importCache = /* @__PURE__ */ new Map();
for (const abs of allFiles) {
const rel = import_node_path2.default.relative(root, abs);
let raw = await import_promises2.default.readFile(abs, "utf8");
raw = await inlineLocalPartials(raw, import_node_path2.default.dirname(abs), importCache);
const cleaned = cleanContent(raw, { excludeImports: opts.excludeImports, removeDuplicateHeadings: opts.removeDuplicateHeadings });
const meta = extractFrontmatterAndTitle(cleaned);
const slugPath = toUrlPath(abs, { root, docsDir, blogDir, ignorePaths: opts.pathTransformation.ignorePaths, addPaths: opts.pathTransformation.addPaths });
const url = toSiteUrl(props.siteConfig, slugPath);
docs.push({ absPath: abs, relPath: rel, url, title: meta.title, description: meta.description, content: meta.content });
}
const ordered = orderDocs(docs, opts.includeOrder, opts.includeUnmatchedLast);
const mdMap = /* @__PURE__ */ new Map();
if (opts.generateMarkdownFiles) {
await import_promises2.default.mkdir(props.outDir, { recursive: true });
const used = /* @__PURE__ */ new Set();
for (const d of ordered) {
const fname = uniqueFileName(fileNameForDoc(d), used);
const outFile = import_node_path2.default.join(props.outDir, fname);
await import_promises2.default.writeFile(outFile, asGeneratedDoc(d));
mdMap.set(d.url, "/" + fname);
}
}
const computedSections = !opts.sections ? autoSectionsFrom(ordered) : void 0;
if (opts.generateLLMsTxt) {
const txt = renderLinksFile({
title: opts.title || props.siteConfig.title || "Documentation",
description: opts.description || props.siteConfig.tagline || "LLM-friendly documentation index",
version: opts.version,
rootContent: opts.rootContent,
docs: ordered,
linkMapper: (u) => mdMap.get(u) || u,
sections: opts.sections ?? computedSections,
optionalLinks: opts.optionalLinks
});
const out = import_node_path2.default.join(props.outDir, opts.llmsTxtFilename);
await import_promises2.default.writeFile(out, txt);
}
if (opts.generateLLMsFullTxt) {
const txt = renderFullFile({
title: opts.title || props.siteConfig.title || "Documentation",
description: opts.description || props.siteConfig.tagline || "LLM-friendly documentation",
version: opts.version,
rootContent: opts.fullRootContent,
docs: ordered
});
const out = import_node_path2.default.join(props.outDir, opts.llmsFullTxtFilename);
await import_promises2.default.writeFile(out, txt);
}
if (opts.customLLMFiles?.length) {
for (const cfg of opts.customLLMFiles) {
const subset = filterDocs(docs, cfg.includePatterns, cfg.ignorePatterns);
const ord = orderDocs(subset, cfg.orderPatterns, cfg.includeUnmatchedLast ?? false);
const title = cfg.title || opts.title || props.siteConfig.title || "Documentation";
const description = cfg.description || opts.description || props.siteConfig.tagline || "";
if (cfg.fullContent) {
const txt = renderFullFile({ title, description, version: cfg.version ?? opts.version, rootContent: cfg.rootContent ?? opts.fullRootContent, docs: ord });
await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, cfg.filename), txt);
} else {
const txt = renderLinksFile({ title, description, version: cfg.version ?? opts.version, rootContent: cfg.rootContent ?? opts.rootContent, docs: ord, linkMapper: (u) => mdMap.get(u) || u });
await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, cfg.filename), txt);
}
}
}
try {
const perDoc = ordered.map((d) => ({
url: d.url,
title: d.title,
headings: headingsFrom(d.content).length,
words: wordCount(d.content),
tokens: tokenEstimate(d.content)
}));
const totals = perDoc.reduce((acc, x) => ({ headings: acc.headings + x.headings, words: acc.words + x.words, tokens: acc.tokens + x.tokens }), { headings: 0, words: 0, tokens: 0 });
const stats = { totalDocs: ordered.length, totals, perDoc };
await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, opts.statsOutFile), JSON.stringify(stats, null, 2));
} catch {
}
if (opts.emitCtx) {
const byUrl = /* @__PURE__ */ new Map();
for (const d of ordered) byUrl.set(d.url, d);
const sections = opts.sections ?? computedSections;
const coreLinks = sections ? sections.flatMap((s) => s.links.map((l) => l.url)) : ordered.map((d) => d.url);
const optionalLinks = (opts.optionalLinks || []).map((l) => l.url);
const fullLinks = optionalLinks.length ? [...coreLinks, ...optionalLinks] : coreLinks;
const ctxCore = renderCtx(byUrl, coreLinks);
const ctxFull = renderCtx(byUrl, fullLinks);
await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, opts.ctxOutFile), ctxCore);
await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, opts.ctxFullOutFile), ctxFull);
}
}
};
}
function withDefaults(o) {
return {
generateLLMsTxt: o.generateLLMsTxt ?? true,
generateLLMsFullTxt: o.generateLLMsFullTxt ?? true,
generateMarkdownFiles: o.generateMarkdownFiles ?? false,
docsDir: o.docsDir ?? "docs",
includeBlog: o.includeBlog ?? false,
blogDir: o.blogDir ?? "blog",
ignoreFiles: o.ignoreFiles ?? [],
includeOrder: o.includeOrder ?? [],
includeUnmatchedLast: o.includeUnmatchedLast ?? true,
pathTransformation: { ignorePaths: o.pathTransformation?.ignorePaths ?? [], addPaths: o.pathTransformation?.addPaths ?? [] },
excludeImports: o.excludeImports ?? false,
removeDuplicateHeadings: o.removeDuplicateHeadings ?? false,
title: o.title,
description: o.description,
version: o.version,
rootContent: o.rootContent,
fullRootContent: o.fullRootContent,
customLLMFiles: o.customLLMFiles ?? [],
llmsTxtFilename: o.llmsTxtFilename ?? "llms.txt",
llmsFullTxtFilename: o.llmsFullTxtFilename ?? "llms-full.txt",
statsOutFile: o.statsOutFile ?? "llms-stats.json",
sections: o.sections,
optionalLinks: o.optionalLinks,
autoSections: o.autoSections ?? true,
emitCtx: o.emitCtx ?? false,
ctxOutFile: o.ctxOutFile ?? "llms-ctx.txt",
ctxFullOutFile: o.ctxFullOutFile ?? "llms-ctx-full.txt"
};
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
docsLLMs
});
//# sourceMappingURL=docs.cjs.map