@coworker-agency/rag
Version:
Retrieval Augmented Generation (RAG) library for document indexing, vector storage, and AI-powered question answering
87 lines (75 loc) • 2.43 kB
JavaScript
/**
* Document Metadata Generator
*
* Tools for extracting and generating metadata from documents
*/
/**
* Generate metadata for a document from Supabase Storage
* @param {Object} file - File object from Supabase storage
* @returns {Object} Document metadata
*/
export function generateDocumentMetadata(file) {
if (!file) {
throw new Error('File object is required');
}
// Extract file extension
const fileExtension = getFileExtension(file.name);
// Format file size in a human-readable format
const fileSizeFormatted = formatFileSize(file.size || 0);
// Create timestamp
const timestamp = new Date().toISOString();
return {
fileName: file.name,
fileType: fileExtension,
mimeType: getMimeType(fileExtension),
fileSize: file.size || 0,
fileSizeFormatted,
created: file.created_at || timestamp,
lastModified: file.last_modified || timestamp,
indexed: timestamp,
source: 'supabase_storage',
};
}
/**
* Get file extension from filename
* @param {string} filename - Name of the file
* @returns {string} File extension
*/
export function getFileExtension(filename) {
return filename.split('.').pop().toLowerCase();
}
/**
* Get MIME type based on file extension
* @param {string} extension - File extension
* @returns {string} MIME type
*/
function getMimeType(extension) {
const mimeTypes = {
'pdf': 'application/pdf',
'txt': 'text/plain',
'md': 'text/markdown',
'json': 'application/json',
'csv': 'text/csv',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'ppt': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'html': 'text/html',
'htm': 'text/html'
};
return mimeTypes[extension] || 'application/octet-stream';
}
/**
* Format file size in human-readable format
* @param {number} bytes - File size in bytes
* @returns {string} Formatted file size
*/
function formatFileSize(bytes) {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}