s3-migrate
Version:
A CLI tool for migrating objects between S3-compatible storage buckets with resumable state tracking.
133 lines • 6 kB
JavaScript
// @ts-ignore
import { compose } from 'node:stream';
import { GetObjectCommand, ListObjectsV2Command, PutObjectCommand, } from '@aws-sdk/client-s3';
import { ProgressBar } from '@opentf/cli-pbar';
import { Command, Option } from 'commander';
import 'dotenv/config';
import prettyBytes from 'pretty-bytes';
import { open } from 'sqlite';
import sqlite3 from 'sqlite3';
import pkg from '../package.json' with { type: 'json' };
import { ChunkSizeTransform } from './chunkSize.js';
import { createS3Client } from './client.js';
const program = new Command();
let db;
let isShuttingDown = false;
async function openDatabase(stateFile) {
return open({
filename: stateFile,
driver: sqlite3.Database,
});
}
async function catalogObjects(bucketName, stateFile, prefix) {
const client = createS3Client('SRC_');
db = await openDatabase(stateFile);
await db.exec('CREATE TABLE IF NOT EXISTS objects (key TEXT PRIMARY KEY, size INTEGER, etag TEXT, last_modified TEXT, copied INTEGER DEFAULT 0)');
let ContinuationToken;
do {
const command = new ListObjectsV2Command({
Bucket: bucketName,
ContinuationToken,
Prefix: prefix,
MaxKeys: 1000,
});
const response = await client.send(command);
if (response.Contents) {
for (const obj of response.Contents) {
await db.run('INSERT OR IGNORE INTO objects (key, size, etag, last_modified, copied) VALUES (?, ?, ?, ?, 0)', obj.Key, obj.Size, obj.ETag, obj.LastModified?.toISOString());
}
}
ContinuationToken = response.NextContinuationToken;
} while (ContinuationToken);
await db.close();
console.log('Cataloging complete.');
}
async function copyObjects(srcBucketName, destBucketName, stateFile, concurrency, chunkSizeBytes, sortBy, sortOrder, checksumsWhenRequired) {
const checksums = checksumsWhenRequired ? 'WHEN_REQUIRED' : 'WHEN_SUPPORTED';
db = await openDatabase(stateFile);
const srcS3 = createS3Client('SRC_', checksums);
const destS3 = createS3Client('DEST_', checksums);
const maxConcurrency = Math.max(1, concurrency);
console.log(`Using concurrency level: ${maxConcurrency}`);
const { count: totalObjects } = (await db.get('SELECT COUNT(*) AS count FROM objects WHERE copied = 0')) || { count: 0 };
if (totalObjects === 0) {
console.log('Nothing to copy.');
return;
}
const { totalBytes } = (await db.get('SELECT SUM(size) AS totalBytes FROM objects WHERE copied = 0')) || { count: 0 };
console.log(`Copying ${totalObjects} objects...`);
const bar = new ProgressBar();
bar.start({ total: totalObjects });
let completed = 0;
let copiedBytes = 0;
let orderByClause = '';
if (sortBy) {
const order = sortOrder || 'asc';
orderByClause = `ORDER BY ${sortBy} ${order}`;
}
async function copyObject(key, size) {
if (isShuttingDown) {
return;
}
try {
const getCommand = new GetObjectCommand({
Bucket: srcBucketName,
Key: key,
});
const response = await srcS3.send(getCommand);
const dataStream = compose(response.Body, new ChunkSizeTransform(chunkSizeBytes));
const putCommand = new PutObjectCommand({
Bucket: destBucketName,
Key: key,
Body: dataStream,
ContentLength: size,
});
await destS3.send(putCommand);
await db.run('UPDATE objects SET copied = 1 WHERE key = ?', key);
completed++;
copiedBytes += size;
bar.update({
value: completed,
suffix: `(${prettyBytes(copiedBytes)}/${prettyBytes(totalBytes)})`,
});
}
catch (error) {
console.error(`Error copying ${key}:`, error);
}
}
while (!isShuttingDown) {
const objects = await db.all(`SELECT key, size FROM objects WHERE copied = 0 ${orderByClause} LIMIT ?`, maxConcurrency);
if (objects.length === 0)
break;
await Promise.all(objects.map((obj) => copyObject(obj.key, obj.size)));
}
bar.stop();
await db.close();
console.log(isShuttingDown ? 'Copy interrupted.' : 'Copy process completed.');
}
process.on('SIGINT', () => {
isShuttingDown = true;
});
program
.command('catalog')
.requiredOption('--src-bucket-name <name>', 'Source bucket name')
.requiredOption('--state-file <path>', 'Path to SQLite state file')
.option('--prefix <prefix>', 'Prefix to filter objects by')
.action(({ srcBucketName, stateFile, prefix }) => catalogObjects(srcBucketName, stateFile, prefix));
program
.command('copy')
.requiredOption('--src-bucket-name <name>', 'Source bucket name')
.requiredOption('--dest-bucket-name <name>', 'Destination bucket name')
.requiredOption('--state-file <path>', 'Path to SQLite state file')
.option('--concurrency <number>', 'Max concurrent copies', '8')
.option('--chunk-size-bytes <number>', 'Size of each chunk in bytes', '2097152')
.addOption(new Option('--sort-by <field>', 'Field to sort by (key, size, etag, last_modified)').choices(['key', 'size', 'etag', 'last_modified']))
.addOption(new Option('--sort-order <order>', 'Sort order (asc, desc)').choices([
'asc',
'desc',
]))
.option('--checksums-when-required', 'Use checksums when required (can be useful if your copy fails with a `XAmzContentSHA256Mismatch` error)')
.action(({ srcBucketName, destBucketName, stateFile, concurrency, chunkSizeBytes, sortBy, sortOrder, checksumsWhenRequired, }) => copyObjects(srcBucketName, destBucketName, stateFile, Number.parseInt(concurrency), Number.parseInt(chunkSizeBytes), sortBy, sortOrder, checksumsWhenRequired));
program.version(pkg.version).parse(process.argv);
//# sourceMappingURL=index.js.map