UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

445 lines (388 loc) 15.8 kB
import { S3Client, PutObjectCommand, GetObjectCommand, ListObjectVersionsCommand, GetObjectAttributesCommand } from '@aws-sdk/client-s3'; import { getSignedUrl } from '@aws-sdk/s3-request-presigner'; import { Readable } from 'stream'; // Create s3Client lazily to allow for proper mocking in tests let s3Client = null; const getS3Client = () => { if (!s3Client) { s3Client = new S3Client({ region: process.env.AWS_REGION || 'us-east-1' }); } return s3Client; }; const BUCKET_NAME = process.env.S3_BUCKET_NAME || (process.env.NODE_ENV === 'test' ? 'test-bucket' : null); const MASTER_MANIFEST_KEY = 'master-manifest.json'; const SIGNED_URL_EXPIRY = parseInt(process.env.SIGNED_URL_EXPIRY_SECONDS) || 3600; // Default 1 hour if (!BUCKET_NAME) { throw new Error('S3_BUCKET_NAME environment variable is required'); } /** * Generates a signed URL for an S3 object * @param {string} key - The S3 object key * @returns {Promise<string>} - The signed URL */ async function generateSignedUrl(key) { const command = new GetObjectCommand({ Bucket: BUCKET_NAME, Key: key }); return await getSignedUrl(getS3Client(), command, { expiresIn: SIGNED_URL_EXPIRY }); } /** * Adds signed URLs to a manifest object * @param {Object} manifest - The manifest object * @returns {Promise<Object>} - Manifest with signed URLs */ async function addSignedUrlsToManifest(manifest) { const result = { ...manifest }; if (manifest?.mogrtFile) { result.mogrtUrl = await generateSignedUrl(manifest.mogrtFile); } if (manifest?.previewFile) { result.previewUrl = await generateSignedUrl(manifest.previewFile); } return result; } /** * Gets the master manifest file containing all MOGRT entries * @returns {Promise<Array>} Array of all MOGRT entries */ async function getMasterManifest() { try { console.log('🔍 Attempting to get master manifest from S3...'); const response = await getS3Client().send( new GetObjectCommand({ Bucket: BUCKET_NAME, Key: MASTER_MANIFEST_KEY }) ); console.log('📥 Retrieved master manifest response'); const manifestData = await response.Body.transformToString(); console.log('📄 Raw master manifest data:', manifestData); let manifest = JSON.parse(manifestData); if (!Array.isArray(manifest)) { console.warn('Master manifest is not an array. Resetting to empty manifest.'); manifest = []; } const requiredKeys = ['id', 'mogrtFile','name', 'previewFile', 'uploadDate']; const validManifest = manifest.filter(entry => { const missingKeys = requiredKeys.filter(key => !(Object.prototype.hasOwnProperty.call(entry, key) && entry[key] !== undefined && entry[key] !== null)); if (missingKeys.length > 0) { console.warn('Manifest entry missing keys:', missingKeys, 'Entry:', JSON.stringify(entry, null, 2)); return false; } return true; }); manifest = validManifest; console.log('🔄 Parsed master manifest:', JSON.stringify(manifest, null, 2)); // Add signed URLs to each entry console.log('🔑 Adding signed URLs to manifest entries...'); const manifestWithUrls = await Promise.all(manifest.map(entry => addSignedUrlsToManifest(entry))); console.log('✅ Successfully added signed URLs to all entries'); return manifestWithUrls; } catch (error) { if (error.name === 'NoSuchKey') { console.log('⚠️ Master manifest not found, creating empty manifest'); const emptyManifest = []; await updateMasterManifest(emptyManifest); return emptyManifest; } console.error('❌ Error getting master manifest:', error); throw error; } } /** * Updates the master manifest file * @param {Array} manifest - The complete manifest array */ async function updateMasterManifest(manifest) { console.log('📝 Preparing to update master manifest...'); console.log('📊 Current manifest entries:', manifest.length); // Remove signed URLs before saving const manifestToSave = manifest.map(({ id, mogrtFile, previewFile, name, uploadDate }) => { console.log(`🔖 Processing entry ${id}:`, { mogrtFile, previewFile, uploadDate }); return { id, mogrtFile, previewFile, name, uploadDate }; }); const uploadParams = { Bucket: BUCKET_NAME, Key: MASTER_MANIFEST_KEY, Body: Buffer.from(JSON.stringify(manifestToSave, null, 2)), ContentType: 'application/json' }; console.log('💾 Saving master manifest to S3:', JSON.stringify(manifestToSave, null, 2)); try { await getS3Client().send(new PutObjectCommand(uploadParams)); console.log('✅ Master manifest successfully updated'); } catch (error) { console.error('❌ Error updating master manifest:', error); throw error; } } /** * Adds a new entry to the master manifest * @param {Object} entry - The new manifest entry to add */ async function addToMasterManifest(entry) { try { console.log('📥 Getting current master manifest...'); const masterManifest = await getMasterManifest(); console.log('📊 Current master manifest entries:', masterManifest.length); console.log('📄 Current master manifest content:', JSON.stringify(masterManifest, null, 2)); // Check if entry already exists const existingIndex = masterManifest.findIndex(item => item.id === entry.id); if (existingIndex !== -1) { console.log(`🔄 Updating existing entry at index ${existingIndex}:`, entry.id); console.log('📝 Old entry:', JSON.stringify(masterManifest[existingIndex], null, 2)); console.log('📝 New entry:', JSON.stringify(entry, null, 2)); masterManifest[existingIndex] = entry; } else { console.log('➕ Adding new entry:', JSON.stringify(entry, null, 2)); masterManifest.push(entry); } console.log('💾 Saving updated master manifest...'); await updateMasterManifest(masterManifest); console.log('✅ Master manifest successfully updated'); console.log('📊 New total entries:', masterManifest.length); } catch (error) { console.error('❌ Error in addToMasterManifest:', error); throw error; } } /** * Removes an entry from the master manifest by ID * @param {string} id - The ID of the entry to remove * @returns {Promise<boolean>} - True if item was found and removed, false if not found */ export async function removeFromMasterManifest(id) { try { console.log(`🔍 Looking for MOGRT with ID ${id} to remove`); const masterManifest = await getMasterManifest(); console.log('📊 Current master manifest entries:', masterManifest.length); // Check if entry exists const initialLength = masterManifest.length; const filteredManifest = masterManifest.filter(item => item.id !== id); if (filteredManifest.length === initialLength) { console.log(`❓ MOGRT with ID ${id} not found in manifest`); return false; } console.log(`🗑️ Removing entry with ID: ${id}`); console.log('💾 Saving updated master manifest...'); await updateMasterManifest(filteredManifest); console.log('✅ Master manifest successfully updated'); console.log('📊 New total entries:', filteredManifest.length); return true; } catch (error) { console.error(`❌ Error removing MOGRT with ID ${id}:`, error); throw error; } } /** * Uploads a file to S3 * @param {string} key - key (location) of the file in S3 * @param {Buffer|Readable} fileData - File data to upload * @returns {Promise<{key: string, location: string}>} */ export async function uploadToS3(key, fileData, contentType) { const uploadParams = { Bucket: BUCKET_NAME, Key: key, Body: fileData instanceof Buffer ? fileData : Readable.from(fileData), ContentType: contentType || (key.endsWith('.json') ? 'application/json' : 'application/octet-stream') }; await getS3Client().send(new PutObjectCommand(uploadParams)); return { key, location: `s3://${BUCKET_NAME}/${key}` }; } /** * Gets the manifest file for a specific upload * @param {string} uploadId - UUID of the upload * @returns {Promise<Object>} - Manifest JSON with signed URLs */ export async function getManifest(uploadId) { if (uploadId === 'master') { return getMasterManifest(); } const key = `${uploadId}/manifest.json`; try { const response = await getS3Client().send( new GetObjectCommand({ Bucket: BUCKET_NAME, Key: key }) ); const manifestData = await response.Body.transformToString(); const manifest = JSON.parse(manifestData); // Add signed URLs to the manifest return await addSignedUrlsToManifest(manifest); } catch (error) { if (error.name === 'NoSuchKey') { throw new Error('Manifest not found'); } throw error; } } /** * Creates or updates an individual manifest and adds it to the master manifest * @param {Object} manifest - The individual manifest to save */ export async function saveManifest(manifest) { try { console.log('Saving manifest:', manifest); // Remove signed URLs if they exist const { id, mogrtFile, name, previewFile, uploadDate } = manifest; const manifestToSave = { id, mogrtFile, name, previewFile, uploadDate }; // Save individual manifest await uploadToS3( `${manifest.id}/manifest.json`, // Key for individual manifest.id, Buffer.from(JSON.stringify(manifestToSave, null, 2)), 'application/json' ); console.log('Individual manifest saved'); // Add to master manifest await addToMasterManifest(manifestToSave); console.log('Added to master manifest'); } catch (error) { console.error('Error saving manifest:', error); throw error; } } /** * Saves a glossary ID to S3 with versioning * @param {string} glossaryId - The glossary ID to save * @param {string} langPair - The language pair for the glossary (e.g., 'en-es') * @param {string} name - Optional name for the glossary * @returns {Promise<{key: string, location: string, versionId: string}>} */ export async function saveGlossaryId(glossaryId, langPair, name = '') { if (!glossaryId) { throw new Error('Glossary ID is required'); } if (!langPair) { throw new Error('Language pair is required'); } const key = `glossaries/${langPair}/${glossaryId}.txt`; const metadata = { 'glossary-id': glossaryId, 'language-pair': langPair }; if (name) { metadata['glossary-name'] = name; } const uploadParams = { Bucket: BUCKET_NAME, Key: key, Body: glossaryId, ContentType: 'text/plain', Metadata: metadata }; try { console.log(`💾 Saving glossary ID ${glossaryId} to S3: ${key}`); const response = await getS3Client().send(new PutObjectCommand(uploadParams)); console.log(`✅ Glossary ID saved with version: ${response.VersionId}`); return { key, location: `s3://${BUCKET_NAME}/${key}`, versionId: response.VersionId }; } catch (error) { console.error(`❌ Error saving glossary ID to S3:`, error); throw error; } } /** * Gets all versions of a glossary * @param {string} glossaryId - The ID of the glossary to get versions for * @param {string} langPair - The language pair for the glossary * @param {string} name - Optional name of the glossary * @returns {Promise<Array>} - Array of glossary versions with metadata */ export async function getGlossaryVersions(glossaryId, langPair, name = '') { const keyPrefix = `glossaries/${langPair}/`; const key = `${keyPrefix}${glossaryId}.txt`; try { console.log(`🔍 Getting versions for glossary ${glossaryId} from S3...`); const response = await getS3Client().send(new ListObjectVersionsCommand({ Bucket: BUCKET_NAME, Prefix: key })); console.log(`📥 Retrieved ${response.Versions?.length || 0} versions`); if (!response.Versions || response.Versions.length === 0) { return []; } // Map versions to a more user-friendly format const versions = await Promise.all(response.Versions.map(async (version) => { try { // Get the object to retrieve the actual glossary ID content const objectResponse = await getS3Client().send(new GetObjectCommand({ Bucket: BUCKET_NAME, Key: version.Key, VersionId: version.VersionId })); const glossaryId = await objectResponse.Body.transformToString(); return { versionId: version.VersionId, glossaryId: glossaryId, lastModified: version.LastModified, isLatest: version.IsLatest, metadata: objectResponse.Metadata || {} }; } catch (error) { console.error(`❌ Error retrieving version ${version.VersionId}:`, error); return { versionId: version.VersionId, lastModified: version.LastModified, isLatest: version.IsLatest, error: 'Failed to retrieve version details' }; } })); return versions; } catch (error) { console.error(`❌ Error getting glossary versions:`, error); throw error; } } /** * Gets a specific version of a glossary * @param {string} glossaryId - The glossary ID * @param {string} langPair - The language pair * @param {string} versionId - The specific version ID to retrieve * @param {string} name - Optional name of the glossary * @returns {Promise<Object>} - The glossary version details */ export async function getGlossaryVersion(glossaryId, langPair, versionId, name = '') { const key = `glossaries/${langPair}/${glossaryId}.txt`; try { console.log(`🔍 Getting version ${versionId} for glossary ${glossaryId}...`); const response = await getS3Client().send(new GetObjectCommand({ Bucket: BUCKET_NAME, Key: key, VersionId: versionId })); const content = await response.Body.transformToString(); return { versionId: versionId, glossaryId: content, lastModified: response.LastModified, metadata: response.Metadata || {} }; } catch (error) { console.error(`❌ Error getting glossary version ${versionId}:`, error); throw error; } } // Export for testing export { getS3Client }; // Reset function for testing export function resetS3Client() { s3Client = null; }