UNPKG

@coworker-agency/rag

Version:

Retrieval Augmented Generation (RAG) library for document indexing, vector storage, and AI-powered question answering

87 lines (75 loc) 2.43 kB
/** * Document Metadata Generator * * Tools for extracting and generating metadata from documents */ /** * Generate metadata for a document from Supabase Storage * @param {Object} file - File object from Supabase storage * @returns {Object} Document metadata */ export function generateDocumentMetadata(file) { if (!file) { throw new Error('File object is required'); } // Extract file extension const fileExtension = getFileExtension(file.name); // Format file size in a human-readable format const fileSizeFormatted = formatFileSize(file.size || 0); // Create timestamp const timestamp = new Date().toISOString(); return { fileName: file.name, fileType: fileExtension, mimeType: getMimeType(fileExtension), fileSize: file.size || 0, fileSizeFormatted, created: file.created_at || timestamp, lastModified: file.last_modified || timestamp, indexed: timestamp, source: 'supabase_storage', }; } /** * Get file extension from filename * @param {string} filename - Name of the file * @returns {string} File extension */ export function getFileExtension(filename) { return filename.split('.').pop().toLowerCase(); } /** * Get MIME type based on file extension * @param {string} extension - File extension * @returns {string} MIME type */ function getMimeType(extension) { const mimeTypes = { 'pdf': 'application/pdf', 'txt': 'text/plain', 'md': 'text/markdown', 'json': 'application/json', 'csv': 'text/csv', 'doc': 'application/msword', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'xls': 'application/vnd.ms-excel', 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'ppt': 'application/vnd.ms-powerpoint', 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'html': 'text/html', 'htm': 'text/html' }; return mimeTypes[extension] || 'application/octet-stream'; } /** * Format file size in human-readable format * @param {number} bytes - File size in bytes * @returns {string} Formatted file size */ function formatFileSize(bytes) { if (bytes === 0) return '0 Bytes'; const k = 1024; const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; const i = Math.floor(Math.log(bytes) / Math.log(k)); return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; }