UNPKG

@sanity/export

Version:

Export Sanity documents and assets

269 lines • 11 kB
import { createWriteStream } from 'node:fs'; import { mkdir, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join as joinPath } from 'node:path'; import { PassThrough } from 'node:stream'; import { finished, pipeline } from 'node:stream/promises'; import { deprecate } from 'node:util'; import { constants as zlib } from 'node:zlib'; import archiver from 'archiver'; import { JsonStreamStringify } from 'json-stream-stringify'; import { isWritableStream, split, throughObj } from './util/streamHelpers.js'; import { AssetHandler } from './AssetHandler.js'; import { DOCUMENT_STREAM_DEBUG_INTERVAL, MODE_STREAM } from './constants.js'; import { debug } from './debug.js'; import { filterDocuments } from './filterDocuments.js'; import { filterDocumentTypes } from './filterDocumentTypes.js'; import { getDocumentCursorStream } from './getDocumentCursorStream.js'; import { getDocumentsStream } from './getDocumentsStream.js'; import { logFirstChunk } from './logFirstChunk.js'; import { rejectOnApiError } from './rejectOnApiError.js'; import { stringifyStream } from './stringifyStream.js'; import { tryParseJson } from './tryParseJson.js'; import { getSource, validateOptions } from './options.js'; const noop = () => null; export async function exportDataset(opts) { const options = validateOptions(opts); const onProgress = options.onProgress ?? noop; const archive = archiver('tar', { gzip: true, gzipOptions: { level: options.compress ? zlib.Z_DEFAULT_COMPRESSION : zlib.Z_NO_COMPRESSION, }, }); archive.on('warning', (err) => { debug('Archive warning: %s', err.message); }); archive.on('entry', (entry) => { debug('Adding archive entry: %s', entry.name); }); const slugDate = new Date() .toISOString() .replace(/[^a-z0-9]/gi, '-') .toLowerCase(); const source = getSource(opts); const prefix = `${source.id}-export-${slugDate}`; const tmpDir = joinPath(tmpdir(), prefix); await mkdir(tmpDir, { recursive: true }); const dataPath = joinPath(tmpDir, 'data.ndjson'); const assetsPath = joinPath(tmpDir, 'assets.json'); const cleanup = () => rm(tmpDir, { recursive: true, force: true }).catch((err) => { debug(`Error while cleaning up temporary files: ${err instanceof Error ? err.message : err}`); return false; }); const assetHandler = new AssetHandler({ client: options.client, tmpDir, prefix, ...(options.assetConcurrency !== undefined && { concurrency: options.assetConcurrency }), ...(options.retryDelayMs !== undefined && { retryDelayMs: options.retryDelayMs }), maxRetries: options.maxAssetRetries, }); debug('Downloading assets (temporarily) to %s', tmpDir); debug('Downloading to %s', isWritableStream(options.outputPath) ? 'stream' : options.outputPath); const outputStream = isWritableStream(options.outputPath) ? options.outputPath : createWriteStream(options.outputPath); let assetStreamHandler = assetHandler.noop; if (!options.raw) { assetStreamHandler = options.assets ? assetHandler.rewriteAssets : assetHandler.stripAssets; } let resolve; let reject; const result = new Promise((res, rej) => { resolve = res; reject = rej; }); finished(archive) .then(() => { debug('Archive finished'); }) .catch(async (archiveErr) => { const err = archiveErr instanceof Error ? archiveErr : new Error(`${archiveErr}`); debug('Archiving errored: %s', err.stack); // Try cleanup, but let original error be the main rejection reason, not the cleanup await cleanup().catch(noop); reject(err); }); debug('Getting dataset export stream, mode: "%s"', options.mode); onProgress({ step: 'Exporting documents...' }); let documentCount = 0; let lastDocumentID = null; let lastReported = Date.now(); const reportDocumentCount = (doc, _enc, cb) => { ++documentCount; const now = Date.now(); // We report to the `onProgress` handler every 50 ms. // It's up to the caller to not do too much expensive work. if (now - lastReported > 50) { onProgress({ step: 'Exporting documents...', current: documentCount, total: '?', update: true, }); lastReported = now; } lastDocumentID = doc._id; cb(null, doc); }; const inputStream = await getDocumentInputStream(options); if ('statusCode' in inputStream) { debug('Got HTTP %d', inputStream.statusCode); } if ('headers' in inputStream) { debug('Response headers: %o', inputStream.headers); } let debugTimer = null; function scheduleDebugTimer() { debugTimer = setTimeout(() => { debug('Still streaming documents', { documentCount, lastDocumentID, }); // Schedule another tick: scheduleDebugTimer(); }, DOCUMENT_STREAM_DEBUG_INTERVAL); } scheduleDebugTimer(); const filterTransform = throughObj((doc, _enc, callback) => { try { const include = options.filterDocument(doc); if (include) { callback(null, doc); } else { callback(); } } catch (err) { callback(err instanceof Error ? err : new Error(`${err}`)); } }); const transformTransform = throughObj((doc, _enc, callback) => { try { callback(null, options.transformDocument(doc)); } catch (err) { callback(err instanceof Error ? err : new Error(`${err}`)); } }); const reportTransform = throughObj(reportDocumentCount); // Use pipeline to chain streams with proper error handling const jsonStream = new PassThrough(); finished(jsonStream) .then(() => debug('JSON stream finished')) .catch((err) => reject(err instanceof Error ? err : new Error(`${err}`))); pipeline(inputStream, logFirstChunk(), split(tryParseJson), rejectOnApiError(), filterDocuments(options.drafts), filterDocumentTypes(options.types), assetStreamHandler, filterTransform, transformTransform, reportTransform, stringifyStream(), jsonStream).catch((err) => { if (debugTimer !== null) clearTimeout(debugTimer); debug(`Export stream error @ ${lastDocumentID}/${documentCount}: `, err); reject(err instanceof Error ? err : new Error(`${err}`)); }); pipeline(jsonStream, createWriteStream(dataPath)) .then(async () => { if (debugTimer !== null) clearTimeout(debugTimer); debug('Export stream completed'); onProgress({ step: 'Exporting documents...', current: documentCount, total: documentCount, update: true, }); debug('Adding data.ndjson to archive'); archive.file(dataPath, { name: 'data.ndjson', prefix }); if (!options.raw && options.assets) { onProgress({ step: 'Downloading assets...' }); } let prevCompleted = 0; const progressInterval = setInterval(() => { const completed = assetHandler.queueSize - assetHandler.queue.size - assetHandler.queue.pending; if (prevCompleted === completed) { return; } prevCompleted = completed; onProgress({ step: 'Downloading assets...', current: completed, total: assetHandler.queueSize, update: true, }); }, 500); debug('Waiting for asset handler to complete downloads'); try { const assetMap = await assetHandler.finish(); // Make sure we mark the progress as done (eg 100/100 instead of 99/100) onProgress({ step: 'Downloading assets...', current: assetHandler.queueSize, total: assetHandler.queueSize, update: true, }); const assetsStream = createWriteStream(assetsPath); await pipeline(new JsonStreamStringify(assetMap), assetsStream); if (options.assetsMap) { archive.file(assetsPath, { name: 'assets.json', prefix }); } clearInterval(progressInterval); } catch (assetErr) { clearInterval(progressInterval); await cleanup().catch(noop); // Try to clean up, but ignore errors here reject(assetErr instanceof Error ? assetErr : new Error(`${assetErr}`)); return; } // Add all downloaded assets to archive archive.directory(joinPath(tmpDir, 'files'), `${prefix}/files`); archive.directory(joinPath(tmpDir, 'images'), `${prefix}/images`); debug('Finalizing archive, flushing streams'); onProgress({ step: 'Adding assets to archive...' }); await archive.finalize(); }) .catch(async (err) => { if (debugTimer !== null) clearTimeout(debugTimer); debug(`Export stream error @ ${lastDocumentID}/${documentCount}: `, err); await cleanup().catch(noop); reject(err instanceof Error ? err : new Error(`${err}`)); }); pipeline(archive, outputStream) .then(() => onComplete()) .catch(onComplete); async function onComplete(err) { onProgress({ step: 'Clearing temporary files...' }); await cleanup(); if (!err) { debug('Export completed'); resolve({ outputPath: options.outputPath, documentCount, assetCount: assetHandler.filesWritten, }); return; } debug('Error during streaming: %s', err.stack); assetHandler.clear(); reject(err); } return result; } function getDocumentInputStream(options) { return options.mode === MODE_STREAM ? getDocumentsStream(options) : getDocumentCursorStream(options); } export async function exportMediaLibrary(options) { return exportDataset(options); } /** * Alias for `exportDataset`, for backwards compatibility. * Use named `exportDataset` instead. * * @deprecated Default export is deprecated and will be removed in a future release. Use named "exportDataset" function instead. * @public */ export default deprecate(function deprecatedExport(opts) { return exportDataset(opts); }, `Default export of "@sanity/export" is deprecated and will be removed in a future release. Please use the named "exportDataset" function instead.`, 'DEP_SANITY_EXPORT_DEFAULT'); //# sourceMappingURL=export.js.map