UNPKG

llmoptimizer

Version:

Generate an llms.txt summary of your website/docs for LLMs (framework-agnostic with Vite/Next/Nuxt/Astro/Remix helpers).

github.com/ihuzaifashoukat/llmoptimizer

ihuzaifashoukat/llmoptimizer

442 lines (439 loc) • 18.4 kB

JavaScript

"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/integrations/docs.ts var docs_exports = {}; __export(docs_exports, { docsLLMs: () => docsLLMs }); module.exports = __toCommonJS(docs_exports); var import_node_path2 = __toESM(require("path"), 1); var import_promises2 = __toESM(require("fs/promises"), 1); var import_globby = require("globby"); // src/integrations/docs-helpers.ts var import_node_path = __toESM(require("path"), 1); var import_promises = __toESM(require("fs/promises"), 1); async function inlineLocalPartials(raw, baseDir, cache) { const importRegex = /^\s*import\s+([A-Za-z0-9_]+)\s+from\s+['"](\.\/[^'"\n]+)['"];?\s*$/gm; const imports = []; const replaced = raw.replace(importRegex, (line) => { const mm = /^\s*import\s+([A-Za-z0-9_]+)\s+from\s+['"](\.\/[^'"\n]+)['"];?\s*$/.exec(line); if (!mm) return ""; const name = mm[1]; const rel = mm[2]; if (!/\/_[^/]+\.(md|mdx)$/i.test(rel)) return ""; const abs = import_node_path.default.resolve(baseDir, rel); imports.push({ name, abs, content: "" }); return ""; }); for (const imp of imports) { if (!cache.has(imp.abs)) { try { let txt = await import_promises.default.readFile(imp.abs, "utf8"); txt = await inlineLocalPartials(txt, import_node_path.default.dirname(imp.abs), cache); cache.set(imp.abs, txt); } catch { cache.set(imp.abs, ""); } } imp.content = cache.get(imp.abs) || ""; } let out = replaced; for (const imp of imports) { const usageRe = new RegExp(`<${imp.name}\\s*/>`, "g"); out = out.replace(usageRe, imp.content); } return out; } function cleanContent(raw, opts) { let s = raw; if (opts.excludeImports) { s = s.replace(/^\s*import\s+[^\n]+\n/gm, "").replace(/^\s*export\s+(const|let|var|default)\s+[^\n]*\n/gm, ""); } if (opts.removeDuplicateHeadings) { const lines = s.split(/\r?\n/); const out = []; let lastHeading = null; for (const ln of lines) { const h = ln.match(/^\s*#\s+(.+)$/); if (h) { lastHeading = h[1].trim(); out.push(ln); continue; } if (lastHeading && ln.trim() === lastHeading) continue; out.push(ln); if (ln.trim()) lastHeading = null; } s = out.join("\n"); } return s; } function extractFrontmatterAndTitle(s) { let title = ""; let description; let body = s; const fm = s.match(/^---\n([\s\S]*?)\n---\n?/); if (fm) { body = s.slice(fm[0].length); const block = fm[1]; const t = block.match(/^\s*title:\s*(.+)\s*$/m); const d = block.match(/^\s*description:\s*(.+)\s*$/m); if (t) title = stripQuotes(t[1].trim()); if (d) description = stripQuotes(d[1].trim()); } if (!title) { const h1 = body.match(/^\s*#\s+(.+)$/m); if (h1) title = h1[1].trim(); } if (!title) title = "Untitled"; return { title, description, content: body.trim() }; } function stripQuotes(s) { if (s.startsWith('"') && s.endsWith('"') || s.startsWith("'") && s.endsWith("'")) return s.slice(1, -1); return s; } function toUrlPath(abs, ctx) { const relFromRoot = import_node_path.default.relative(ctx.root, abs).replace(/\\/g, "/"); let segs = relFromRoot.split("/"); const drops = [ctx.docsDir.replace(/\\/g, "/"), ctx.blogDir?.replace(/\\/g, "/")].filter(Boolean); if (drops.length) { while (drops.includes(segs[0])) segs.shift(); } if (ctx.ignorePaths?.length) segs = segs.filter((s) => !ctx.ignorePaths.includes(s)); if (ctx.addPaths?.length) segs = [...ctx.addPaths, ...segs]; const last = segs.pop() || ""; const base = last.replace(/\.(md|mdx)$/i, ""); segs.push(base); return "/" + segs.filter(Boolean).join("/"); } function toSiteUrl(cfg, slugPath) { const base = (cfg.url ? cfg.url.replace(/\/$/, "") : "") + (cfg.baseUrl || ""); return base ? base.replace(/\/$/, "") + slugPath : slugPath; } function globToRegex(g) { const esc = g.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*\*/g, "::DOUBLESTAR::").replace(/\*/g, "[^/]*").replace(/::DOUBLESTAR::/g, ".*"); return new RegExp("^" + esc + "$"); } function orderDocs(docs, includeOrder, includeUnmatchedLast) { if (!includeOrder || !includeOrder.length) return docs.slice(); const matched = []; const rest = new Set(docs); for (const pat of includeOrder) { const re = globToRegex(pat); for (const d of docs) { if (rest.has(d) && (re.test(d.relPath) || re.test(d.url))) { matched.push(d); rest.delete(d); } } } if (includeUnmatchedLast) matched.push(...Array.from(rest)); return matched; } function filterDocs(docs, includePatterns, ignorePatterns) { const inc = includePatterns.map(globToRegex); const ig = (ignorePatterns || []).map(globToRegex); return docs.filter((d) => inc.some((r) => r.test(d.relPath) || r.test(d.url))).filter((d) => !ig.some((r) => r.test(d.relPath) || r.test(d.url))); } function slugify(s) { return s.toLowerCase().trim().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-"); } function headingsFrom(content) { const lines = content.split(/\r?\n/); const hs = []; for (const ln of lines) { const m = ln.match(/^\s*#{1,4}\s+(.+)$/); if (m) hs.push(m[1].trim()); } return hs; } function wordCount(content) { const noCode = content.replace(/```[\s\S]*?```/g, " "); const plain = noCode.replace(/[`*_>#\-\[\]\(\)!]/g, " "); const words = plain.split(/\s+/).filter(Boolean); return words.length; } function tokenEstimate(content) { return Math.max(0, Math.round(wordCount(content) * 1.3)); } function renderLinksFile(ctx) { const lines = []; lines.push(`# ${ctx.title}`); if (ctx.description) lines.push(`> ${ctx.description}`); if (ctx.version) lines.push("", `Version: ${ctx.version}`); if (ctx.rootContent) { lines.push("", ctx.rootContent.trim(), ""); } if (ctx.sections && ctx.sections.length) { for (const sec of ctx.sections) { lines.push("", `## ${sec.name}`); for (const link of sec.links) { const url = ctx.linkMapper(link.url); const base = `- [${link.title}](${url})`; lines.push(link.notes ? `${base}: ${link.notes}` : base); } } } else { lines.push("", "## Docs"); for (const d of ctx.docs) { const url = ctx.linkMapper(d.url); lines.push(`- [${d.title}](${url})`); } } if (ctx.optionalLinks && ctx.optionalLinks.length) { lines.push("", "## Optional"); for (const link of ctx.optionalLinks) { const url = ctx.linkMapper(link.url); const base = `- [${link.title}](${url})`; lines.push(link.notes ? `${base}: ${link.notes}` : base); } } lines.push("", "Generated By: LLMOPTIMIZER BY Huzaifa Shoukat"); return lines.join("\n") + "\n"; } function autoSectionsFrom(docs) { if (!docs.length) return void 0; const groups = { "Getting Started": [], Guides: [], API: [], Tutorials: [], Reference: [], Docs: [] }; for (const d of docs) { const p = d.relPath.toLowerCase(); if (p.includes("getting-started") || p.includes("quick-start") || p.includes("quickstart")) groups["Getting Started"].push({ title: d.title, url: d.url }); else if (p.includes("/guide") || p.includes("guides")) groups["Guides"].push({ title: d.title, url: d.url }); else if (p.includes("/api") || p.includes("reference/api")) groups["API"].push({ title: d.title, url: d.url }); else if (p.includes("tutorial")) groups["Tutorials"].push({ title: d.title, url: d.url }); else if (p.includes("reference")) groups["Reference"].push({ title: d.title, url: d.url }); else groups["Docs"].push({ title: d.title, url: d.url }); } const sections = Object.entries(groups).filter(([, links]) => links.length).map(([name, links]) => ({ name, links })); return sections.length ? sections : void 0; } function renderCtx(byUrl, urls) { const lines = []; for (const u of urls) { const d = byUrl.get(u); if (!d) continue; lines.push(`# ${d.title} `); if (d.description) lines.push(`> ${d.description} `); lines.push(d.content.trim(), ""); } return lines.join("\n"); } function renderFullFile(ctx) { const lines = []; lines.push(`# ${ctx.title}`); if (ctx.description) lines.push(`> ${ctx.description}`); if (ctx.version) lines.push("", `Version: ${ctx.version}`); if (ctx.rootContent) { lines.push("", ctx.rootContent.trim(), ""); } lines.push(""); for (const d of ctx.docs) { lines.push(` # ${d.title} `); if (d.description) lines.push(`> ${d.description} `); lines.push(d.content, ""); } lines.push("", "Generated By: LLMOPTIMIZER BY Huzaifa Shoukat"); return lines.join("\n") + "\n"; } function fileNameForDoc(d) { const base = slugify(d.title || "doc") || "doc"; return base + ".md"; } function uniqueFileName(name, used) { if (!used.has(name)) { used.add(name); return name; } const [base, ext] = name.split(/\.(?=[^.]+$)/); let i = 1; while (used.has(`${base}-${i}.${ext}`)) i++; const nn = `${base}-${i}.${ext}`; used.add(nn); return nn; } function asGeneratedDoc(d) { const lines = []; lines.push(`# ${d.title}`); if (d.description) lines.push(` > ${d.description} `); lines.push(d.content); return lines.join("\n") + "\n"; } // src/integrations/docs.ts function docsLLMs(options = {}) { const opts = withDefaults(options); return { name: "llmoptimizer-docs", async postBuild(props) { const root = process.cwd(); const docsDir = import_node_path2.default.resolve(root, opts.docsDir); const blogDir = opts.includeBlog ? import_node_path2.default.resolve(root, opts.blogDir) : void 0; const allFiles = []; const patterns = ["**/*.md", "**/*.mdx"]; const ig = ["**/_*.md", "**/_*.mdx", ...opts.ignoreFiles || []]; const docsFiles = await (0, import_globby.globby)(patterns, { cwd: docsDir, ignore: ig, absolute: true }); allFiles.push(...docsFiles); if (blogDir) { const blogFiles = await (0, import_globby.globby)(patterns, { cwd: blogDir, ignore: ig, absolute: true }); allFiles.push(...blogFiles); } const docs = []; const importCache = /* @__PURE__ */ new Map(); for (const abs of allFiles) { const rel = import_node_path2.default.relative(root, abs); let raw = await import_promises2.default.readFile(abs, "utf8"); raw = await inlineLocalPartials(raw, import_node_path2.default.dirname(abs), importCache); const cleaned = cleanContent(raw, { excludeImports: opts.excludeImports, removeDuplicateHeadings: opts.removeDuplicateHeadings }); const meta = extractFrontmatterAndTitle(cleaned); const slugPath = toUrlPath(abs, { root, docsDir, blogDir, ignorePaths: opts.pathTransformation.ignorePaths, addPaths: opts.pathTransformation.addPaths }); const url = toSiteUrl(props.siteConfig, slugPath); docs.push({ absPath: abs, relPath: rel, url, title: meta.title, description: meta.description, content: meta.content }); } const ordered = orderDocs(docs, opts.includeOrder, opts.includeUnmatchedLast); const mdMap = /* @__PURE__ */ new Map(); if (opts.generateMarkdownFiles) { await import_promises2.default.mkdir(props.outDir, { recursive: true }); const used = /* @__PURE__ */ new Set(); for (const d of ordered) { const fname = uniqueFileName(fileNameForDoc(d), used); const outFile = import_node_path2.default.join(props.outDir, fname); await import_promises2.default.writeFile(outFile, asGeneratedDoc(d)); mdMap.set(d.url, "/" + fname); } } const computedSections = !opts.sections ? autoSectionsFrom(ordered) : void 0; if (opts.generateLLMsTxt) { const txt = renderLinksFile({ title: opts.title || props.siteConfig.title || "Documentation", description: opts.description || props.siteConfig.tagline || "LLM-friendly documentation index", version: opts.version, rootContent: opts.rootContent, docs: ordered, linkMapper: (u) => mdMap.get(u) || u, sections: opts.sections ?? computedSections, optionalLinks: opts.optionalLinks }); const out = import_node_path2.default.join(props.outDir, opts.llmsTxtFilename); await import_promises2.default.writeFile(out, txt); } if (opts.generateLLMsFullTxt) { const txt = renderFullFile({ title: opts.title || props.siteConfig.title || "Documentation", description: opts.description || props.siteConfig.tagline || "LLM-friendly documentation", version: opts.version, rootContent: opts.fullRootContent, docs: ordered }); const out = import_node_path2.default.join(props.outDir, opts.llmsFullTxtFilename); await import_promises2.default.writeFile(out, txt); } if (opts.customLLMFiles?.length) { for (const cfg of opts.customLLMFiles) { const subset = filterDocs(docs, cfg.includePatterns, cfg.ignorePatterns); const ord = orderDocs(subset, cfg.orderPatterns, cfg.includeUnmatchedLast ?? false); const title = cfg.title || opts.title || props.siteConfig.title || "Documentation"; const description = cfg.description || opts.description || props.siteConfig.tagline || ""; if (cfg.fullContent) { const txt = renderFullFile({ title, description, version: cfg.version ?? opts.version, rootContent: cfg.rootContent ?? opts.fullRootContent, docs: ord }); await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, cfg.filename), txt); } else { const txt = renderLinksFile({ title, description, version: cfg.version ?? opts.version, rootContent: cfg.rootContent ?? opts.rootContent, docs: ord, linkMapper: (u) => mdMap.get(u) || u }); await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, cfg.filename), txt); } } } try { const perDoc = ordered.map((d) => ({ url: d.url, title: d.title, headings: headingsFrom(d.content).length, words: wordCount(d.content), tokens: tokenEstimate(d.content) })); const totals = perDoc.reduce((acc, x) => ({ headings: acc.headings + x.headings, words: acc.words + x.words, tokens: acc.tokens + x.tokens }), { headings: 0, words: 0, tokens: 0 }); const stats = { totalDocs: ordered.length, totals, perDoc }; await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, opts.statsOutFile), JSON.stringify(stats, null, 2)); } catch { } if (opts.emitCtx) { const byUrl = /* @__PURE__ */ new Map(); for (const d of ordered) byUrl.set(d.url, d); const sections = opts.sections ?? computedSections; const coreLinks = sections ? sections.flatMap((s) => s.links.map((l) => l.url)) : ordered.map((d) => d.url); const optionalLinks = (opts.optionalLinks || []).map((l) => l.url); const fullLinks = optionalLinks.length ? [...coreLinks, ...optionalLinks] : coreLinks; const ctxCore = renderCtx(byUrl, coreLinks); const ctxFull = renderCtx(byUrl, fullLinks); await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, opts.ctxOutFile), ctxCore); await import_promises2.default.writeFile(import_node_path2.default.join(props.outDir, opts.ctxFullOutFile), ctxFull); } } }; } function withDefaults(o) { return { generateLLMsTxt: o.generateLLMsTxt ?? true, generateLLMsFullTxt: o.generateLLMsFullTxt ?? true, generateMarkdownFiles: o.generateMarkdownFiles ?? false, docsDir: o.docsDir ?? "docs", includeBlog: o.includeBlog ?? false, blogDir: o.blogDir ?? "blog", ignoreFiles: o.ignoreFiles ?? [], includeOrder: o.includeOrder ?? [], includeUnmatchedLast: o.includeUnmatchedLast ?? true, pathTransformation: { ignorePaths: o.pathTransformation?.ignorePaths ?? [], addPaths: o.pathTransformation?.addPaths ?? [] }, excludeImports: o.excludeImports ?? false, removeDuplicateHeadings: o.removeDuplicateHeadings ?? false, title: o.title, description: o.description, version: o.version, rootContent: o.rootContent, fullRootContent: o.fullRootContent, customLLMFiles: o.customLLMFiles ?? [], llmsTxtFilename: o.llmsTxtFilename ?? "llms.txt", llmsFullTxtFilename: o.llmsFullTxtFilename ?? "llms-full.txt", statsOutFile: o.statsOutFile ?? "llms-stats.json", sections: o.sections, optionalLinks: o.optionalLinks, autoSections: o.autoSections ?? true, emitCtx: o.emitCtx ?? false, ctxOutFile: o.ctxOutFile ?? "llms-ctx.txt", ctxFullOutFile: o.ctxFullOutFile ?? "llms-ctx-full.txt" }; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { docsLLMs }); //# sourceMappingURL=docs.cjs.map