UNPKG

sanity

Version:

Sanity is a real-time content infrastructure with a scalable, hosted backend featuring a Graph Oriented Query Language (GROQ), asset pipelines and fast edge caches

287 lines (250 loc) 10.4 kB
import {createWriteStream, existsSync, mkdirSync} from 'node:fs' import {mkdtemp} from 'node:fs/promises' import {tmpdir} from 'node:os' import path from 'node:path' import {finished} from 'node:stream/promises' import { type CliCommandArguments, type CliCommandContext, type CliCommandDefinition, type SanityClient, } from '@sanity/cli' import {absolutify} from '@sanity/util/fs' import {Mutex} from 'async-mutex' import createDebug from 'debug' import {isString} from 'lodash' import prettyMs from 'pretty-ms' import {hideBin} from 'yargs/helpers' import yargs from 'yargs/yargs' import archiveDir from '../../actions/backup/archiveDir' import chooseBackupIdPrompt from '../../actions/backup/chooseBackupIdPrompt' import cleanupTmpDir from '../../actions/backup/cleanupTmpDir' import downloadAsset from '../../actions/backup/downloadAsset' import downloadDocument from '../../actions/backup/downloadDocument' import {type File, PaginatedGetBackupStream} from '../../actions/backup/fetchNextBackupPage' import parseApiErr from '../../actions/backup/parseApiErr' import newProgress from '../../actions/backup/progressSpinner' import resolveApiClient from '../../actions/backup/resolveApiClient' import humanFileSize from '../../util/humanFileSize' import isPathDirName from '../../util/isPathDirName' import {defaultApiVersion} from './backupGroup' const debug = createDebug('sanity:backup') const DEFAULT_DOWNLOAD_CONCURRENCY = 10 const MAX_DOWNLOAD_CONCURRENCY = 24 interface DownloadBackupOptions { projectId: string datasetName: string token: string backupId: string outDir: string outFileName: string overwrite: boolean concurrency: number } const helpText = ` Options --backup-id <string> The backup ID to download. (required) --out <string> The file or directory path the backup should download to. --overwrite Allows overwriting of existing backup file. --concurrency <num> Concurrent number of backup item downloads. (max: 24) Examples sanity backup download DATASET_NAME --backup-id 2024-01-01-backup-1 sanity backup download DATASET_NAME --backup-id 2024-01-01-backup-2 --out /path/to/file sanity backup download DATASET_NAME --backup-id 2024-01-01-backup-3 --out /path/to/file --overwrite ` function parseCliFlags(args: {argv?: string[]}) { return yargs(hideBin(args.argv || process.argv).slice(2)) .options('backup-id', {type: 'string'}) .options('out', {type: 'string'}) .options('concurrency', {type: 'number', default: DEFAULT_DOWNLOAD_CONCURRENCY}) .options('overwrite', {type: 'boolean', default: false}).argv } const downloadBackupCommand: CliCommandDefinition = { name: 'download', group: 'backup', signature: '[DATASET_NAME]', description: 'Download a dataset backup to a local file.', helpText, // eslint-disable-next-line max-statements action: async (args, context) => { const {output, chalk} = context const [client, opts] = await prepareBackupOptions(context, args) const {projectId, datasetName, backupId, outDir, outFileName} = opts // If any of the output path or file name is empty, cancel the operation. if (outDir === '' || outFileName === '') { output.print('Operation cancelled.') return } const outFilePath = path.join(outDir, outFileName) output.print('╭───────────────────────────────────────────────────────────╮') output.print('│ │') output.print('│ Downloading backup for: │') output.print(`│ ${chalk.bold('projectId')}: ${chalk.cyan(projectId).padEnd(56)} │`) output.print(`│ ${chalk.bold('dataset')}: ${chalk.cyan(datasetName).padEnd(58)} │`) output.print(`│ ${chalk.bold('backupId')}: ${chalk.cyan(backupId).padEnd(56)} │`) output.print('│ │') output.print('╰───────────────────────────────────────────────────────────╯') output.print('') output.print(`Downloading backup to "${chalk.cyan(outFilePath)}"`) const start = Date.now() const progressSpinner = newProgress(output, 'Setting up backup environment...') // Create a unique temporary directory to store files before bundling them into the archive at outputPath. // Temporary directories are normally deleted at the end of backup process, any unexpected exit may leave them // behind, hence it is important to create a unique directory for each attempt. const tmpOutDir = await mkdtemp(path.join(tmpdir(), `sanity-backup-`)) // Create required directories if they don't exist. for (const dir of [outDir, path.join(tmpOutDir, 'images'), path.join(tmpOutDir, 'files')]) { mkdirSync(dir, {recursive: true}) } debug('Writing to temporary directory %s', tmpOutDir) const tmpOutDocumentsFile = path.join(tmpOutDir, 'data.ndjson') // Handle concurrent writes to the same file using mutex. const docOutStream = createWriteStream(tmpOutDocumentsFile) const docWriteMutex = new Mutex() try { const backupFileStream = new PaginatedGetBackupStream( client, opts.projectId, opts.datasetName, opts.backupId, opts.token, ) const files: File[] = [] let i = 0 for await (const file of backupFileStream) { files.push(file) i++ progressSpinner.set({ step: `Reading backup files...`, update: true, current: i, total: backupFileStream.totalFiles, }) } let totalItemsDownloaded = 0 // This is dynamically imported because this module is ESM only and this file gets compiled to CJS at this time. const {default: pMap} = await import('p-map') await pMap( files, async (file: File) => { if (file.type === 'file' || file.type === 'image') { await downloadAsset(file.url, file.name, file.type, tmpOutDir) } else { const doc = await downloadDocument(file.url) await docWriteMutex.runExclusive(() => { docOutStream.write(`${doc}\n`) }) } totalItemsDownloaded += 1 progressSpinner.set({ step: `Downloading documents and assets...`, update: true, current: totalItemsDownloaded, total: backupFileStream.totalFiles, }) }, {concurrency: opts.concurrency}, ) } catch (error) { progressSpinner.fail() const {message} = parseApiErr(error) throw new Error(`Downloading dataset backup failed: ${message}`) } docOutStream.end() await finished(docOutStream) progressSpinner.set({step: `Archiving files into a tarball...`, update: true}) try { await archiveDir(tmpOutDir, outFilePath, (processedBytes: number) => { progressSpinner.update({ step: `Archiving files into a tarball, ${humanFileSize(processedBytes)} bytes written...`, }) }) } catch (err) { progressSpinner.fail() throw new Error(`Archiving backup failed: ${err.message}`) } progressSpinner.set({ step: `Cleaning up temporary files at ${chalk.cyan(`${tmpOutDir}`)}`, }) cleanupTmpDir(tmpOutDir) progressSpinner.set({ step: `Backup download complete [${prettyMs(Date.now() - start)}]`, }) progressSpinner.succeed() }, } // prepareBackupOptions validates backup options from CLI and prepares Client and DownloadBackupOptions. async function prepareBackupOptions( context: CliCommandContext, args: CliCommandArguments, ): Promise<[SanityClient, DownloadBackupOptions]> { const flags = await parseCliFlags(args) const [dataset] = args.argsWithoutOptions const {prompt, workDir} = context const {projectId, datasetName, client} = await resolveApiClient( context, dataset, defaultApiVersion, ) const {token} = client.config() if (!isString(token) || token.length < 1) { throw new Error(`token is missing`) } if (!isString(datasetName) || datasetName.length < 1) { throw new Error(`dataset ${datasetName} must be a valid dataset name`) } const backupId = String(flags['backup-id'] || (await chooseBackupIdPrompt(context, datasetName))) if (backupId.length < 1) { throw new Error(`backup-id ${flags['backup-id']} should be a valid string`) } if ('concurrency' in flags) { if (flags.concurrency < 1 || flags.concurrency > MAX_DOWNLOAD_CONCURRENCY) { throw new Error(`concurrency should be in 1 to ${MAX_DOWNLOAD_CONCURRENCY} range`) } } const defaultOutFileName = `${datasetName}-backup-${backupId}.tar.gz` let out = await (async (): Promise<string> => { if (flags.out !== undefined) { // Rewrite the output path to an absolute path, if it is not already. return absolutify(flags.out) } const input = await prompt.single({ type: 'input', message: 'Output path:', default: path.join(workDir, defaultOutFileName), filter: absolutify, }) return input })() // If path is a directory name, then add a default file name to the path. if (isPathDirName(out)) { out = path.join(out, defaultOutFileName) } // If the file already exists, ask for confirmation if it should be overwritten. if (!flags.overwrite && existsSync(out)) { const shouldOverwrite = await prompt.single({ type: 'confirm', message: `File "${out}" already exists, would you like to overwrite it?`, default: false, }) // If the user does not want to overwrite the file, set the output path to an empty string. // This should be handled by the caller of this function as cancel operation. if (!shouldOverwrite) { out = '' } } return [ client, { projectId, datasetName, backupId, token, outDir: path.dirname(out), outFileName: path.basename(out), overwrite: flags.overwrite, concurrency: flags.concurrency || DEFAULT_DOWNLOAD_CONCURRENCY, }, ] } export default downloadBackupCommand