codecanon
Version:
CLI tool that downloads documentation from 3rd party libraries, converts them to Markdown, and optimizes for LLMs
134 lines (130 loc) • 5.13 kB
JavaScript
import { promises as fs } from "fs";
import { join } from "path";
import { getWorkspacePaths, isWorkspaceInitialized, loadWorkspaceConfig, saveWorkspaceConfig, } from "./workspace.js";
/**
* Add a package's documentation to the local workspace
*/
export async function addPackage(options) {
// Ensure workspace is initialized
if (!(await isWorkspaceInitialized(options.cwd))) {
throw new Error("No CodeCanon workspace found. Run 'canon init' first.");
}
const config = await loadWorkspaceConfig(options.cwd);
const paths = getWorkspacePaths(options.cwd);
// Check if package already exists
const existingPackage = config.packages.find((p) => p.name === options.name);
if (existingPackage && !options.force) {
throw new Error(`Package '${options.name}' already exists. Use --force to re-fetch.`);
}
// Create package cache directory
const packageVersion = options.version || "latest";
const packageCachePath = join(paths.cache, options.name, packageVersion);
await fs.mkdir(packageCachePath, { recursive: true });
await fs.mkdir(join(packageCachePath, "raw"), { recursive: true });
await fs.mkdir(join(packageCachePath, "chunks"), { recursive: true });
// TODO: Implement actual fetching logic
// For now, create placeholder files
const documentsCount = await fetchPackageDocumentation(options, packageCachePath);
const chunksCount = options.enableChunking
? await processDocumentationChunks(packageCachePath)
: documentsCount;
// Update package info
const packageInfo = {
name: options.name,
version: packageVersion,
source: options.source,
addedAt: new Date().toISOString(),
documentsCount,
chunksCount,
};
// Update config
if (existingPackage) {
const index = config.packages.findIndex((p) => p.name === options.name);
config.packages[index] = packageInfo;
}
else {
config.packages.push(packageInfo);
}
await saveWorkspaceConfig(options.cwd, config);
// Update llms.txt
await updateLlmsTxt(options.cwd, config);
return {
documentsCount,
chunksCount,
};
}
/**
* Fetch documentation for a package (placeholder implementation)
*/
async function fetchPackageDocumentation(options, packageCachePath) {
// TODO: Implement actual NPM registry lookup and documentation fetching
// This is a placeholder that creates a dummy file
const indexPath = join(packageCachePath, "index.json");
const metadata = {
name: options.name,
version: options.version || "latest",
source: options.source,
fetchedAt: new Date().toISOString(),
// TODO: Add actual documentation URLs and metadata
documentationUrl: `https://npmjs.com/package/${options.name}`,
files: [],
};
await fs.writeFile(indexPath, JSON.stringify(metadata, null, 2));
// Create a placeholder README
const readmePath = join(packageCachePath, "raw", "README.md");
const placeholderContent = `# ${options.name}
This is a placeholder for the ${options.name} documentation.
The actual fetching implementation will be added later.
Package: ${options.name}
Version: ${options.version || "latest"}
Source: ${options.source}
Fetched: ${new Date().toISOString()}
`;
await fs.writeFile(readmePath, placeholderContent);
return 1; // Return 1 document for now
}
/**
* Process documentation into semantic chunks (placeholder implementation)
*/
async function processDocumentationChunks(packageCachePath) {
// TODO: Implement semantic chunking logic
// For now, just copy the raw file to chunks directory
const rawPath = join(packageCachePath, "raw", "README.md");
const chunkPath = join(packageCachePath, "chunks", "README.chunk.md");
try {
const content = await fs.readFile(rawPath, "utf-8");
await fs.writeFile(chunkPath, content);
return 1; // Return 1 chunk for now
}
catch (error) {
console.warn("Warning: Could not process chunks, using raw file");
return 1;
}
}
/**
* Update the llms.txt file with current packages
*/
async function updateLlmsTxt(cwd, config) {
const paths = getWorkspacePaths(cwd);
let content = `# CodeCanon Context Index
# This file follows the llms.txt standard for LLM context discovery
# Generated on ${new Date().toISOString()}
`;
if (config.packages.length === 0) {
content +=
"# No packages added yet. Use 'canon add <package>' to get started.\n";
}
else {
content += "# Available package documentation:\n";
for (const pkg of config.packages) {
content += `# - ${pkg.name}@${pkg.version} (${pkg.documentsCount} docs, ${pkg.chunksCount} chunks)\n`;
}
content += "\n";
// Add relative paths to chunk files
for (const pkg of config.packages) {
const chunkDir = join(".canon", "cache", pkg.name, pkg.version, "chunks");
content += `${chunkDir}/\n`;
}
}
await fs.writeFile(paths.llmsTxt, content);
}