UNPKG

openai-code

Version:

An unofficial proxy layer that lets you use Anthropic Claude Code with any OpenAI API backend.

233 lines (219 loc) 8.06 kB
import { storeFileName } from './vectordb.mjs' import { statSync, readFileSync } from 'node:fs' export const nonEmbeddableFileEndings = [ '.lock', // lock files (e.g., package-lock.json, yarn.lock) '.exe', // executable files '.dll', // dynamic link library files '.bin', // binary files '.env', // environment files '.zip', // zip files '.tar', // tar files '.gz', // gzip files '.bz2', // bzip2 files '.xz', // XZ files '.7z', // 7-Zip files '.rar', // RAR files '.iso', // disk image files '.img', // disk image files '.sys', // system files '.tmp', // temporary files '.log', // log files '.dat', // data files '.db', // database files '.bak', // backup files '.swp', // swap files '.swo', // swap files '.class', // Java class files '.o', // object files '.a', // archive files '.so', // shared object files '.dylib', // dynamic library files on macOS '.lib', // library files '.pdb', // program database files '.msi', // Windows installer package '.cab', // cabinet files '.vmdk', // virtual machine disk files '.vdi', // virtual disk image files '.vhd', // virtual hard disk files '.vhdx', // virtual hard disk files '.qcow2', // QEMU Copy-On-Write version 2 files '.dmg', // macOS disk image files '.pkg', // package files '.deb', // Debian package files '.rpm', // Red Hat package manager files '.tar', // tarball files '.gz', // gzip compressed files '.zip', // zip compressed files '.rar', // RAR compressed files '.7z', // 7-Zip compressed files '.xz', // XZ compressed files '.bz2', // Bzip2 compressed files '.z', // Z compressed files '.tgz', // tarball gzip compressed files '.tbz2', // tarball bzip2 compressed files '.txz', // tarball XZ compressed files '.lz', // Lzip compressed files '.lzma', // LZMA compressed files '.lzo', // LZO compressed files '.war', // Web application archive files '.ear', // Enterprise application archive files '.jar', // Java archive files '.apk', // Android package files '.ipa', // iOS application archive files '.app', // macOS application package '.pyc', // Python compiled files '.pyo', // Python optimized files '.pyd', // Python dynamic module files '.rbc', // Ruby compiled files '.rbo', // Ruby optimized files '.beam', // Erlang compiled files '.elc', // Emacs Lisp compiled files '.scpt', // AppleScript compiled files '.scptd', // AppleScript compiled script bundle '.exe', // Windows executable files '.com', // DOS command files '.scr', // Windows screen saver files '.cpl', // Windows control panel files '.msc', // Microsoft Management Console files '.gadget',// Windows gadget files '.svg', // Scalable Vector Graphics files 'package-lock.json', // lock files (e.g., package-lock.json, yarn.lock) '.map', // source map files '.pem', // certificate files '.key', // key files '.csr', // certificate signing request files '.crt', // certificate files '.cer', // certificate files '.pfx', // PKCS #12 files '.p12', // PKCS #12 files '.p7b', // PKCS #7 files '.p7r', // PKCS #7 files '.jks', // Java KeyStore files '.keystore', // Java KeyStore files '.der', // DER encoded files '.crl', // certificate revocation list files '.sst', // Microsoft Serialized Certificate Store files '.csr', // certificate signing request files '.rsa', // RSA key files '.dsa', // DSA key files '.ec', // EC key files '.pub', // public key files '.sig', // signature files '.pgp', // PGP files '.asc', // ASCII armored files '.bak', // backup files '.old', // old files '.orig', // original files '.min.js',// minified JavaScript files '.min.css',// minified CSS files '.npmignore', // npm ignore files '.gitignore', // Git ignore files '.dockerignore', // Docker ignore files '.eslintignore', // ESLint ignore files '.prettierignore', // Prettier ignore files '.DS_Store', // macOS directory attribute files 'Thumbs.db', // Windows thumbnail cache files 'CLAUDE.md', // CLAUDE metadata files 'CLAUDE_RULES.md', // CLAUDE rules files 'CLAUDE_STATE.json', // CLAUDE state files storeFileName, // CLAUDE vector database files itself ]; export const skipDirectoryNames = [ 'node_modules', // Node.js modules 'data', // Data files '.git', // Git repository '.claudeignore',// CLAUDE ignore files '.svn', // Subversion repository '.ssh', // SSH keys '.cert', // Certificate files '.certs', // Certificate files '.hg', // Mercurial repository '.vscode', // Visual Studio Code settings '.idea', // IntelliJ IDEA settings '.vs', // Visual Studio settings '.github', // GitHub settings '.gitlab', // GitLab settings '.circleci', // CircleCI settings '.travis', // Travis CI settings '.appveyor', // AppVeyor settings '.docker', // Docker settings '.vagrant', // Vagrant settings '.terraform', // Terraform settings '.vscode-test', // Visual Studio Code test settings 'build', // Build output directory 'dist', // Distribution output directory 'venv', // Python virtual environment 'target', // Rust and Java build output 'bin', // C/C++ binary output 'obj', // C/C++ object files 'logs', // Log files 'tmp', // Temporary files 'temp', // Temporary files 'cache', // Cache files 'out', // Output files 'coverage', // Code coverage reports ] export function isPrintable(ch) { const code = ch.charCodeAt(0); // Allow alphanumerics and common whitespace if ( (code >= 48 && code <= 57) || // 0-9 (code >= 65 && code <= 90) || // A-Z (code >= 97 && code <= 122) || // a-z ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' ) { return true; } // Define a palette of punctuation/control characters common in programming languages const programmingChars = new Set([ ',', '.', ';', ':', '!', '?', '$', '(', ')', '{', '}', '[', ']', '<', '>', '=', '+', '-', '*', '/', '%', '&', '|', '^', '~', '`', '"', "'", '\\', '@', '#' ]); if (programmingChars.has(ch)) { return true; } // Also allow any character within the basic ASCII printable range (for completeness) if (code >= 0x20 && code <= 0x7E) { return true; } // Include common Unicode blocks for CJK characters if (code >= 0x4E00 && code <= 0x9FFF) return true; // CJK Unified Ideographs if (code >= 0x3000 && code <= 0x303F) return true; // CJK Symbols and Punctuation if (code >= 0x3040 && code <= 0x309F) return true; // Hiragana if (code >= 0x30A0 && code <= 0x30FF) return true; // Katakana if (code >= 0xAC00 && code <= 0xD7AF) return true; // Hangul Syllables return false; } export function isEmbeddableTextFile(filePath) { try { // Check file size (128 kB limit) const stats = statSync(filePath); if (stats.size > 128 * 1024) return false; // Read the file as a Buffer const buffer = readFileSync(filePath); // Quick binary check: if a null byte is found, likely binary if (buffer.indexOf(0) !== -1) return false; // Convert Buffer to string (assuming UTF-8) const text = buffer.toString('utf8'); const lines = text.split(/\r?\n/); if (lines.length < 2) return false; // Limit analysis to the first 1024 characters const analysisLength = Math.min(1024, text.length); let printableCount = 0; for (let i = 0; i < analysisLength; i++) { const ch = text.charAt(i); if (isPrintable(ch)) { printableCount++; } } const ratio = printableCount / analysisLength; if (ratio < 0.9) return false; return true; } catch (err) { console.error('Error reading file:', err); return false; } }