@aj-archipelago/cortex
Version:
Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.
445 lines (388 loc) • 15.8 kB
JavaScript
import { S3Client, PutObjectCommand, GetObjectCommand, ListObjectVersionsCommand, GetObjectAttributesCommand } from '@aws-sdk/client-s3';
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
import { Readable } from 'stream';
// Create s3Client lazily to allow for proper mocking in tests
let s3Client = null;
const getS3Client = () => {
if (!s3Client) {
s3Client = new S3Client({
region: process.env.AWS_REGION || 'us-east-1'
});
}
return s3Client;
};
const BUCKET_NAME = process.env.S3_BUCKET_NAME || (process.env.NODE_ENV === 'test' ? 'test-bucket' : null);
const MASTER_MANIFEST_KEY = 'master-manifest.json';
const SIGNED_URL_EXPIRY = parseInt(process.env.SIGNED_URL_EXPIRY_SECONDS) || 3600; // Default 1 hour
if (!BUCKET_NAME) {
throw new Error('S3_BUCKET_NAME environment variable is required');
}
/**
* Generates a signed URL for an S3 object
* @param {string} key - The S3 object key
* @returns {Promise<string>} - The signed URL
*/
async function generateSignedUrl(key) {
const command = new GetObjectCommand({
Bucket: BUCKET_NAME,
Key: key
});
return await getSignedUrl(getS3Client(), command, { expiresIn: SIGNED_URL_EXPIRY });
}
/**
* Adds signed URLs to a manifest object
* @param {Object} manifest - The manifest object
* @returns {Promise<Object>} - Manifest with signed URLs
*/
async function addSignedUrlsToManifest(manifest) {
const result = { ...manifest };
if (manifest?.mogrtFile) {
result.mogrtUrl = await generateSignedUrl(manifest.mogrtFile);
}
if (manifest?.previewFile) {
result.previewUrl = await generateSignedUrl(manifest.previewFile);
}
return result;
}
/**
* Gets the master manifest file containing all MOGRT entries
* @returns {Promise<Array>} Array of all MOGRT entries
*/
async function getMasterManifest() {
try {
console.log('🔍 Attempting to get master manifest from S3...');
const response = await getS3Client().send(
new GetObjectCommand({
Bucket: BUCKET_NAME,
Key: MASTER_MANIFEST_KEY
})
);
console.log('📥 Retrieved master manifest response');
const manifestData = await response.Body.transformToString();
console.log('📄 Raw master manifest data:', manifestData);
let manifest = JSON.parse(manifestData);
if (!Array.isArray(manifest)) {
console.warn('Master manifest is not an array. Resetting to empty manifest.');
manifest = [];
}
const requiredKeys = ['id', 'mogrtFile','name', 'previewFile', 'uploadDate'];
const validManifest = manifest.filter(entry => {
const missingKeys = requiredKeys.filter(key => !(Object.prototype.hasOwnProperty.call(entry, key) && entry[key] !== undefined && entry[key] !== null));
if (missingKeys.length > 0) {
console.warn('Manifest entry missing keys:', missingKeys, 'Entry:', JSON.stringify(entry, null, 2));
return false;
}
return true;
});
manifest = validManifest;
console.log('🔄 Parsed master manifest:', JSON.stringify(manifest, null, 2));
// Add signed URLs to each entry
console.log('🔑 Adding signed URLs to manifest entries...');
const manifestWithUrls = await Promise.all(manifest.map(entry => addSignedUrlsToManifest(entry)));
console.log('✅ Successfully added signed URLs to all entries');
return manifestWithUrls;
} catch (error) {
if (error.name === 'NoSuchKey') {
console.log('⚠️ Master manifest not found, creating empty manifest');
const emptyManifest = [];
await updateMasterManifest(emptyManifest);
return emptyManifest;
}
console.error('❌ Error getting master manifest:', error);
throw error;
}
}
/**
* Updates the master manifest file
* @param {Array} manifest - The complete manifest array
*/
async function updateMasterManifest(manifest) {
console.log('📝 Preparing to update master manifest...');
console.log('📊 Current manifest entries:', manifest.length);
// Remove signed URLs before saving
const manifestToSave = manifest.map(({ id, mogrtFile, previewFile, name, uploadDate }) => {
console.log(`🔖 Processing entry ${id}:`, { mogrtFile, previewFile, uploadDate });
return {
id,
mogrtFile,
previewFile,
name,
uploadDate
};
});
const uploadParams = {
Bucket: BUCKET_NAME,
Key: MASTER_MANIFEST_KEY,
Body: Buffer.from(JSON.stringify(manifestToSave, null, 2)),
ContentType: 'application/json'
};
console.log('💾 Saving master manifest to S3:', JSON.stringify(manifestToSave, null, 2));
try {
await getS3Client().send(new PutObjectCommand(uploadParams));
console.log('✅ Master manifest successfully updated');
} catch (error) {
console.error('❌ Error updating master manifest:', error);
throw error;
}
}
/**
* Adds a new entry to the master manifest
* @param {Object} entry - The new manifest entry to add
*/
async function addToMasterManifest(entry) {
try {
console.log('📥 Getting current master manifest...');
const masterManifest = await getMasterManifest();
console.log('📊 Current master manifest entries:', masterManifest.length);
console.log('📄 Current master manifest content:', JSON.stringify(masterManifest, null, 2));
// Check if entry already exists
const existingIndex = masterManifest.findIndex(item => item.id === entry.id);
if (existingIndex !== -1) {
console.log(`🔄 Updating existing entry at index ${existingIndex}:`, entry.id);
console.log('📝 Old entry:', JSON.stringify(masterManifest[existingIndex], null, 2));
console.log('📝 New entry:', JSON.stringify(entry, null, 2));
masterManifest[existingIndex] = entry;
} else {
console.log('➕ Adding new entry:', JSON.stringify(entry, null, 2));
masterManifest.push(entry);
}
console.log('💾 Saving updated master manifest...');
await updateMasterManifest(masterManifest);
console.log('✅ Master manifest successfully updated');
console.log('📊 New total entries:', masterManifest.length);
} catch (error) {
console.error('❌ Error in addToMasterManifest:', error);
throw error;
}
}
/**
* Removes an entry from the master manifest by ID
* @param {string} id - The ID of the entry to remove
* @returns {Promise<boolean>} - True if item was found and removed, false if not found
*/
export async function removeFromMasterManifest(id) {
try {
console.log(`🔍 Looking for MOGRT with ID ${id} to remove`);
const masterManifest = await getMasterManifest();
console.log('📊 Current master manifest entries:', masterManifest.length);
// Check if entry exists
const initialLength = masterManifest.length;
const filteredManifest = masterManifest.filter(item => item.id !== id);
if (filteredManifest.length === initialLength) {
console.log(`❓ MOGRT with ID ${id} not found in manifest`);
return false;
}
console.log(`🗑️ Removing entry with ID: ${id}`);
console.log('💾 Saving updated master manifest...');
await updateMasterManifest(filteredManifest);
console.log('✅ Master manifest successfully updated');
console.log('📊 New total entries:', filteredManifest.length);
return true;
} catch (error) {
console.error(`❌ Error removing MOGRT with ID ${id}:`, error);
throw error;
}
}
/**
* Uploads a file to S3
* @param {string} key - key (location) of the file in S3
* @param {Buffer|Readable} fileData - File data to upload
* @returns {Promise<{key: string, location: string}>}
*/
export async function uploadToS3(key, fileData, contentType) {
const uploadParams = {
Bucket: BUCKET_NAME,
Key: key,
Body: fileData instanceof Buffer ? fileData : Readable.from(fileData),
ContentType: contentType || (key.endsWith('.json') ? 'application/json' : 'application/octet-stream')
};
await getS3Client().send(new PutObjectCommand(uploadParams));
return {
key,
location: `s3://${BUCKET_NAME}/${key}`
};
}
/**
* Gets the manifest file for a specific upload
* @param {string} uploadId - UUID of the upload
* @returns {Promise<Object>} - Manifest JSON with signed URLs
*/
export async function getManifest(uploadId) {
if (uploadId === 'master') {
return getMasterManifest();
}
const key = `${uploadId}/manifest.json`;
try {
const response = await getS3Client().send(
new GetObjectCommand({
Bucket: BUCKET_NAME,
Key: key
})
);
const manifestData = await response.Body.transformToString();
const manifest = JSON.parse(manifestData);
// Add signed URLs to the manifest
return await addSignedUrlsToManifest(manifest);
} catch (error) {
if (error.name === 'NoSuchKey') {
throw new Error('Manifest not found');
}
throw error;
}
}
/**
* Creates or updates an individual manifest and adds it to the master manifest
* @param {Object} manifest - The individual manifest to save
*/
export async function saveManifest(manifest) {
try {
console.log('Saving manifest:', manifest);
// Remove signed URLs if they exist
const { id, mogrtFile, name, previewFile, uploadDate } = manifest;
const manifestToSave = { id, mogrtFile, name, previewFile, uploadDate };
// Save individual manifest
await uploadToS3(
`${manifest.id}/manifest.json`, // Key for individual manifest.id,
Buffer.from(JSON.stringify(manifestToSave, null, 2)),
'application/json'
);
console.log('Individual manifest saved');
// Add to master manifest
await addToMasterManifest(manifestToSave);
console.log('Added to master manifest');
} catch (error) {
console.error('Error saving manifest:', error);
throw error;
}
}
/**
* Saves a glossary ID to S3 with versioning
* @param {string} glossaryId - The glossary ID to save
* @param {string} langPair - The language pair for the glossary (e.g., 'en-es')
* @param {string} name - Optional name for the glossary
* @returns {Promise<{key: string, location: string, versionId: string}>}
*/
export async function saveGlossaryId(glossaryId, langPair, name = '') {
if (!glossaryId) {
throw new Error('Glossary ID is required');
}
if (!langPair) {
throw new Error('Language pair is required');
}
const key = `glossaries/${langPair}/${glossaryId}.txt`;
const metadata = {
'glossary-id': glossaryId,
'language-pair': langPair
};
if (name) {
metadata['glossary-name'] = name;
}
const uploadParams = {
Bucket: BUCKET_NAME,
Key: key,
Body: glossaryId,
ContentType: 'text/plain',
Metadata: metadata
};
try {
console.log(`💾 Saving glossary ID ${glossaryId} to S3: ${key}`);
const response = await getS3Client().send(new PutObjectCommand(uploadParams));
console.log(`✅ Glossary ID saved with version: ${response.VersionId}`);
return {
key,
location: `s3://${BUCKET_NAME}/${key}`,
versionId: response.VersionId
};
} catch (error) {
console.error(`❌ Error saving glossary ID to S3:`, error);
throw error;
}
}
/**
* Gets all versions of a glossary
* @param {string} glossaryId - The ID of the glossary to get versions for
* @param {string} langPair - The language pair for the glossary
* @param {string} name - Optional name of the glossary
* @returns {Promise<Array>} - Array of glossary versions with metadata
*/
export async function getGlossaryVersions(glossaryId, langPair, name = '') {
const keyPrefix = `glossaries/${langPair}/`;
const key = `${keyPrefix}${glossaryId}.txt`;
try {
console.log(`🔍 Getting versions for glossary ${glossaryId} from S3...`);
const response = await getS3Client().send(new ListObjectVersionsCommand({
Bucket: BUCKET_NAME,
Prefix: key
}));
console.log(`📥 Retrieved ${response.Versions?.length || 0} versions`);
if (!response.Versions || response.Versions.length === 0) {
return [];
}
// Map versions to a more user-friendly format
const versions = await Promise.all(response.Versions.map(async (version) => {
try {
// Get the object to retrieve the actual glossary ID content
const objectResponse = await getS3Client().send(new GetObjectCommand({
Bucket: BUCKET_NAME,
Key: version.Key,
VersionId: version.VersionId
}));
const glossaryId = await objectResponse.Body.transformToString();
return {
versionId: version.VersionId,
glossaryId: glossaryId,
lastModified: version.LastModified,
isLatest: version.IsLatest,
metadata: objectResponse.Metadata || {}
};
} catch (error) {
console.error(`❌ Error retrieving version ${version.VersionId}:`, error);
return {
versionId: version.VersionId,
lastModified: version.LastModified,
isLatest: version.IsLatest,
error: 'Failed to retrieve version details'
};
}
}));
return versions;
} catch (error) {
console.error(`❌ Error getting glossary versions:`, error);
throw error;
}
}
/**
* Gets a specific version of a glossary
* @param {string} glossaryId - The glossary ID
* @param {string} langPair - The language pair
* @param {string} versionId - The specific version ID to retrieve
* @param {string} name - Optional name of the glossary
* @returns {Promise<Object>} - The glossary version details
*/
export async function getGlossaryVersion(glossaryId, langPair, versionId, name = '') {
const key = `glossaries/${langPair}/${glossaryId}.txt`;
try {
console.log(`🔍 Getting version ${versionId} for glossary ${glossaryId}...`);
const response = await getS3Client().send(new GetObjectCommand({
Bucket: BUCKET_NAME,
Key: key,
VersionId: versionId
}));
const content = await response.Body.transformToString();
return {
versionId: versionId,
glossaryId: content,
lastModified: response.LastModified,
metadata: response.Metadata || {}
};
} catch (error) {
console.error(`❌ Error getting glossary version ${versionId}:`, error);
throw error;
}
}
// Export for testing
export { getS3Client };
// Reset function for testing
export function resetS3Client() {
s3Client = null;
}