UNPKG

@sanity/export

Version:

Export Sanity documents and assets

384 lines 15.2 kB
import { createHash } from 'node:crypto'; import { createWriteStream, mkdirSync } from 'node:fs'; import { join as joinPath } from 'node:path'; import { pipeline } from 'node:stream/promises'; import PQueue from 'p-queue'; import { delay } from './util/delay.js'; import { through, throughObj } from './util/streamHelpers.js'; import { ASSET_DOWNLOAD_CONCURRENCY, ASSET_DOWNLOAD_MAX_RETRIES, DEFAULT_RETRY_DELAY, } from './constants.js'; import { debug } from './debug.js'; import { getUserAgent } from './getUserAgent.js'; import { requestStream } from './requestStream.js'; import { rm } from 'node:fs/promises'; const EXCLUDE_PROPS = ['_id', '_type', 'assetId', 'extension', 'mimeType', 'path', 'url']; const ACTION_REMOVE = 'remove'; const ACTION_REWRITE = 'rewrite'; export class AssetHandler { client; tmpDir; assetDirsCreated; downloading; assetsSeen; assetMap; filesWritten; queueSize; maxRetries; retryDelayMs; queue; rejectedError; reject; constructor(options) { const concurrency = options.concurrency ?? ASSET_DOWNLOAD_CONCURRENCY; debug('Using asset download concurrency of %d', concurrency); this.client = options.client; this.tmpDir = options.tmpDir; this.assetDirsCreated = false; this.downloading = []; this.assetsSeen = new Map(); this.assetMap = {}; this.filesWritten = 0; this.queueSize = 0; this.maxRetries = options.maxRetries ?? ASSET_DOWNLOAD_MAX_RETRIES; this.retryDelayMs = options.retryDelayMs; this.queue = options.queue ?? new PQueue({ concurrency }); this.rejectedError = null; this.reject = (err) => { this.rejectedError = err; }; } clear() { this.assetsSeen.clear(); this.queue.clear(); this.queueSize = 0; } finish() { return new Promise((resolve, reject) => { if (this.rejectedError) { reject(this.rejectedError); return; } this.reject = reject; void this.queue.onIdle().then(() => resolve(this.assetMap)); }); } // Called when we want to download all assets to local filesystem and rewrite documents to hold // placeholder asset references (_sanityAsset: 'image@file:///local/path') rewriteAssets = throughObj((doc, _enc, callback) => { if (['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)) { const assetDoc = doc; const type = doc._type === 'sanity.imageAsset' ? 'image' : 'file'; const filePath = `${type}s/${generateFilename(doc._id)}`; this.assetsSeen.set(doc._id, type); this.queueAssetDownload(assetDoc, filePath); callback(); return; } callback(null, this.findAndModify(doc, ACTION_REWRITE)); }); // Called in the case where we don't _want_ assets, so basically just remove all asset documents // as well as references to assets (*.asset._ref ^= (image|file)-) stripAssets = throughObj((doc, _enc, callback) => { if (['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)) { callback(); return; } callback(null, this.findAndModify(doc, ACTION_REMOVE)); }); // Called when we are using raw export mode along with `assets: false`, where we simply // want to skip asset documents but retain asset references (useful for data mangling) skipAssets = throughObj((doc, _enc, callback) => { const isAsset = ['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type); if (isAsset) { callback(); return; } callback(null, doc); }); noop = throughObj((doc, _enc, callback) => callback(null, doc)); queueAssetDownload(assetDoc, dstPath) { if (!assetDoc.url) { debug('Asset document "%s" does not have a URL property, skipping', assetDoc._id); return; } debug('Adding download task for %s (destination: %s)', assetDoc._id, dstPath); this.queueSize++; this.downloading.push(assetDoc.url); const doDownload = async () => { let dlError; for (let attempt = 0; attempt < this.maxRetries; attempt++) { try { return await this.downloadAsset(assetDoc, dstPath); } catch (err) { const downloadError = err; // Ignore inaccessible assets switch (downloadError.statusCode) { case 401: case 403: case 404: console.warn(`⚠ Asset failed with HTTP %d (ignoring): %s`, downloadError.statusCode, assetDoc._id); return true; default: } debug(`Error downloading asset %s (destination: %s), attempt %d`, assetDoc._id, dstPath, attempt, err); dlError = downloadError; if (downloadError.statusCode && downloadError.statusCode >= 400 && downloadError.statusCode < 500) { // Don't retry on client errors break; } await delay(this.retryDelayMs ?? DEFAULT_RETRY_DELAY); } } throw new Error(dlError?.message ?? 'Unknown error downloading asset'); }; this.queue .add(() => doDownload().catch((err) => { debug('Failed to download the asset, aborting download', err); this.queue.clear(); this.reject(err instanceof Error ? err : new Error(String(err))); })) .catch((error) => { debug('Queued task failed', error); }); } maybeCreateAssetDirs() { if (this.assetDirsCreated) { return; } mkdirSync(joinPath(this.tmpDir, 'files'), { recursive: true }); mkdirSync(joinPath(this.tmpDir, 'images'), { recursive: true }); this.assetDirsCreated = true; } getAssetRequestOptions(assetDoc) { const token = this.client.config().token; const headers = { 'User-Agent': getUserAgent() }; const isImage = assetDoc._type === 'sanity.imageAsset'; const url = URL.parse(assetDoc.url ?? ''); // If we can't parse it, return as-is if (!url) { return { url: assetDoc.url ?? '', headers }; } if (isImage && token && (url.hostname === 'cdn.sanity.io' || url.hostname === 'cdn.sanity.work' || // used in tests. use a very specific port to avoid conflicts url.host === 'localhost:43216')) { headers.Authorization = `Bearer ${token}`; url.searchParams.set('dlRaw', 'true'); } return { url: url.toString(), headers }; } async downloadAsset(assetDoc, dstPath) { const { url } = assetDoc; debug('Downloading asset %s', url); const options = this.getAssetRequestOptions(assetDoc); let stream; try { stream = await requestStream({ maxRetries: 0, // We handle retries ourselves in queueAssetDownload ...options, }); } catch (err) { const message = 'Failed to create asset stream'; if (err instanceof Error) { err.message = `${message}: ${err.message}`; throw err; } throw new Error('Failed create asset stream', { cause: err }); } if (stream.statusCode !== 200) { let errMsg; try { const err = await tryGetErrorFromStream(stream); errMsg = `Referenced asset URL "${url}" returned HTTP ${stream.statusCode}`; if (err) { errMsg = `${errMsg}: ${err}`; } } catch (err) { const message = 'Failed to parse error response from asset stream'; if (err instanceof Error) { err.message = `${message}: ${err.message}`; throw err; } throw new Error(message, { cause: err }); } const streamError = new Error(errMsg); if (stream.statusCode !== undefined) { streamError.statusCode = stream.statusCode; } throw streamError; } this.maybeCreateAssetDirs(); debug('Asset stream ready, writing to filesystem at %s', dstPath); const tmpPath = joinPath(this.tmpDir, dstPath); let sha1 = ''; let md5 = ''; let size = 0; try { const res = await writeHashedStream(tmpPath, stream); sha1 = res.sha1; md5 = res.md5; size = res.size; } catch (err) { const message = 'Failed to write asset stream to filesystem'; if (err instanceof Error) { err.message = `${message}: ${err.message}`; throw err; } throw new Error(message, { cause: err }); } // Verify it against our downloaded stream to make sure we have the same copy const contentLength = stream.headers?.['content-length']; const remoteSha1 = stream.headers?.['x-sanity-sha1']; const remoteMd5 = stream.headers?.['x-sanity-md5']; const hasHash = Boolean(remoteSha1 || remoteMd5); const method = sha1 ? 'sha1' : 'md5'; // Asset validity is primarily determined by the sha1 hash. However, the sha1 hash is computed // before certain processes (i.e. svg sanitization) which can result in a different hash. // When the sha1 hashes don't match, fallback to using the md5 hash. const sha1Differs = remoteSha1 && sha1 !== remoteSha1; const md5Differs = remoteMd5 && md5 !== remoteMd5; const differs = sha1Differs && md5Differs; if (differs) { const details = [ hasHash && (method === 'md5' ? `md5 should be ${remoteMd5}, got ${md5}` : `sha1 should be ${remoteSha1}, got ${sha1}`), contentLength && parseInt(String(contentLength), 10) !== size && `Asset should be ${contentLength} bytes, got ${size}`, ]; const detailsString = `Details:\n - ${details.filter(Boolean).join('\n - ')}`; await rm(tmpPath, { recursive: true, force: true }); throw new Error(`Failed to download asset at ${assetDoc.url}. ${detailsString}`); } const isImage = assetDoc._type === 'sanity.imageAsset'; const type = isImage ? 'image' : 'file'; const id = `${type}-${sha1}`; const metaProps = omit(assetDoc, EXCLUDE_PROPS); if (Object.keys(metaProps).length > 0) { this.assetMap[id] = metaProps; } this.downloading.splice(this.downloading.findIndex((datUrl) => datUrl === url), 1); this.filesWritten++; return true; } findAndModify = (item, action) => { if (Array.isArray(item)) { const children = item.map((child) => this.findAndModify(child, action)); return children.filter((child) => child != null); } if (!item || typeof item !== 'object') { return item; } const record = item; const isAsset = isAssetField(record); if (isAsset && action === ACTION_REMOVE) { return undefined; } if (isAsset && action === ACTION_REWRITE) { const { asset, ...other } = record; const assetId = asset._ref; const assetType = getAssetType(record); const filePath = `${assetType}s/${generateFilename(assetId)}`; const modified = this.findAndModify(other, action); return { _sanityAsset: `${assetType}@file://./${filePath}`, ...(typeof modified === 'object' && modified !== null ? modified : {}), }; } const newItem = {}; const keys = Object.keys(record); for (const key of keys) { const value = record[key]; newItem[key] = this.findAndModify(value, action); if (typeof newItem[key] === 'undefined') { // eslint-disable-next-line @typescript-eslint/no-dynamic-delete delete newItem[key]; } } return newItem; }; } function isAssetField(item) { const asset = item.asset; return Boolean(asset?._ref && typeof asset._ref === 'string' && isSanityAsset(asset._ref)); } function getAssetType(item) { const asset = item.asset; if (!asset || typeof asset._ref !== 'string') { return null; } const match = asset._ref.match(/^(image|file)-/); return match?.[1] ?? null; } function isSanityAsset(assetId) { return (/^image-[a-f0-9]{40}-\d+x\d+-[a-z]+$/.test(assetId) || /^file-[a-f0-9]{40}-[a-z0-9]+$/.test(assetId)); } function generateFilename(assetId) { const match = assetId.match(/^(image|file)-(.*?)(-[a-z]+)?$/); const asset = match?.[2]; const ext = match?.[3]; const extension = (ext ?? 'bin').replace(/^-/, ''); return asset ? `${asset}.${extension}` : `${assetId}.bin`; } async function writeHashedStream(filePath, stream) { let size = 0; const md5 = createHash('md5'); const sha1 = createHash('sha1'); const hasher = through((chunk, _enc, cb) => { size += chunk.length; md5.update(chunk); sha1.update(chunk); cb(null, chunk); }); await pipeline(stream, hasher, createWriteStream(filePath)); return { size, sha1: sha1.digest('hex'), md5: md5.digest('hex'), }; } function tryGetErrorFromStream(stream) { return new Promise((resolve, reject) => { const chunks = []; let receivedData = false; stream.on('data', (chunk) => { receivedData = true; chunks.push(chunk); }); stream.on('end', () => { if (!receivedData) { resolve(null); return; } const body = Buffer.concat(chunks); try { const parsed = JSON.parse(body.toString('utf8')); resolve(parsed.message ?? parsed.error ?? null); } catch { resolve(body.toString('utf8').slice(0, 16000)); } }); stream.on('error', reject); }); } function omit(obj, keys) { const copy = {}; for (const [key, value] of Object.entries(obj)) { if (!keys.includes(key)) { copy[key] = value; } } return copy; } //# sourceMappingURL=AssetHandler.js.map