@sanity/export
Version:
Export Sanity documents and assets
384 lines • 15.2 kB
JavaScript
import { createHash } from 'node:crypto';
import { createWriteStream, mkdirSync } from 'node:fs';
import { join as joinPath } from 'node:path';
import { pipeline } from 'node:stream/promises';
import PQueue from 'p-queue';
import { delay } from './util/delay.js';
import { through, throughObj } from './util/streamHelpers.js';
import { ASSET_DOWNLOAD_CONCURRENCY, ASSET_DOWNLOAD_MAX_RETRIES, DEFAULT_RETRY_DELAY, } from './constants.js';
import { debug } from './debug.js';
import { getUserAgent } from './getUserAgent.js';
import { requestStream } from './requestStream.js';
import { rm } from 'node:fs/promises';
const EXCLUDE_PROPS = ['_id', '_type', 'assetId', 'extension', 'mimeType', 'path', 'url'];
const ACTION_REMOVE = 'remove';
const ACTION_REWRITE = 'rewrite';
export class AssetHandler {
client;
tmpDir;
assetDirsCreated;
downloading;
assetsSeen;
assetMap;
filesWritten;
queueSize;
maxRetries;
retryDelayMs;
queue;
rejectedError;
reject;
constructor(options) {
const concurrency = options.concurrency ?? ASSET_DOWNLOAD_CONCURRENCY;
debug('Using asset download concurrency of %d', concurrency);
this.client = options.client;
this.tmpDir = options.tmpDir;
this.assetDirsCreated = false;
this.downloading = [];
this.assetsSeen = new Map();
this.assetMap = {};
this.filesWritten = 0;
this.queueSize = 0;
this.maxRetries = options.maxRetries ?? ASSET_DOWNLOAD_MAX_RETRIES;
this.retryDelayMs = options.retryDelayMs;
this.queue = options.queue ?? new PQueue({ concurrency });
this.rejectedError = null;
this.reject = (err) => {
this.rejectedError = err;
};
}
clear() {
this.assetsSeen.clear();
this.queue.clear();
this.queueSize = 0;
}
finish() {
return new Promise((resolve, reject) => {
if (this.rejectedError) {
reject(this.rejectedError);
return;
}
this.reject = reject;
void this.queue.onIdle().then(() => resolve(this.assetMap));
});
}
// Called when we want to download all assets to local filesystem and rewrite documents to hold
// placeholder asset references (_sanityAsset: 'image@file:///local/path')
rewriteAssets = throughObj((doc, _enc, callback) => {
if (['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)) {
const assetDoc = doc;
const type = doc._type === 'sanity.imageAsset' ? 'image' : 'file';
const filePath = `${type}s/${generateFilename(doc._id)}`;
this.assetsSeen.set(doc._id, type);
this.queueAssetDownload(assetDoc, filePath);
callback();
return;
}
callback(null, this.findAndModify(doc, ACTION_REWRITE));
});
// Called in the case where we don't _want_ assets, so basically just remove all asset documents
// as well as references to assets (*.asset._ref ^= (image|file)-)
stripAssets = throughObj((doc, _enc, callback) => {
if (['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)) {
callback();
return;
}
callback(null, this.findAndModify(doc, ACTION_REMOVE));
});
// Called when we are using raw export mode along with `assets: false`, where we simply
// want to skip asset documents but retain asset references (useful for data mangling)
skipAssets = throughObj((doc, _enc, callback) => {
const isAsset = ['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type);
if (isAsset) {
callback();
return;
}
callback(null, doc);
});
noop = throughObj((doc, _enc, callback) => callback(null, doc));
queueAssetDownload(assetDoc, dstPath) {
if (!assetDoc.url) {
debug('Asset document "%s" does not have a URL property, skipping', assetDoc._id);
return;
}
debug('Adding download task for %s (destination: %s)', assetDoc._id, dstPath);
this.queueSize++;
this.downloading.push(assetDoc.url);
const doDownload = async () => {
let dlError;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
return await this.downloadAsset(assetDoc, dstPath);
}
catch (err) {
const downloadError = err;
// Ignore inaccessible assets
switch (downloadError.statusCode) {
case 401:
case 403:
case 404:
console.warn(`⚠ Asset failed with HTTP %d (ignoring): %s`, downloadError.statusCode, assetDoc._id);
return true;
default:
}
debug(`Error downloading asset %s (destination: %s), attempt %d`, assetDoc._id, dstPath, attempt, err);
dlError = downloadError;
if (downloadError.statusCode &&
downloadError.statusCode >= 400 &&
downloadError.statusCode < 500) {
// Don't retry on client errors
break;
}
await delay(this.retryDelayMs ?? DEFAULT_RETRY_DELAY);
}
}
throw new Error(dlError?.message ?? 'Unknown error downloading asset');
};
this.queue
.add(() => doDownload().catch((err) => {
debug('Failed to download the asset, aborting download', err);
this.queue.clear();
this.reject(err instanceof Error ? err : new Error(String(err)));
}))
.catch((error) => {
debug('Queued task failed', error);
});
}
maybeCreateAssetDirs() {
if (this.assetDirsCreated) {
return;
}
mkdirSync(joinPath(this.tmpDir, 'files'), { recursive: true });
mkdirSync(joinPath(this.tmpDir, 'images'), { recursive: true });
this.assetDirsCreated = true;
}
getAssetRequestOptions(assetDoc) {
const token = this.client.config().token;
const headers = { 'User-Agent': getUserAgent() };
const isImage = assetDoc._type === 'sanity.imageAsset';
const url = URL.parse(assetDoc.url ?? '');
// If we can't parse it, return as-is
if (!url) {
return { url: assetDoc.url ?? '', headers };
}
if (isImage &&
token &&
(url.hostname === 'cdn.sanity.io' ||
url.hostname === 'cdn.sanity.work' ||
// used in tests. use a very specific port to avoid conflicts
url.host === 'localhost:43216')) {
headers.Authorization = `Bearer ${token}`;
url.searchParams.set('dlRaw', 'true');
}
return { url: url.toString(), headers };
}
async downloadAsset(assetDoc, dstPath) {
const { url } = assetDoc;
debug('Downloading asset %s', url);
const options = this.getAssetRequestOptions(assetDoc);
let stream;
try {
stream = await requestStream({
maxRetries: 0, // We handle retries ourselves in queueAssetDownload
...options,
});
}
catch (err) {
const message = 'Failed to create asset stream';
if (err instanceof Error) {
err.message = `${message}: ${err.message}`;
throw err;
}
throw new Error('Failed create asset stream', { cause: err });
}
if (stream.statusCode !== 200) {
let errMsg;
try {
const err = await tryGetErrorFromStream(stream);
errMsg = `Referenced asset URL "${url}" returned HTTP ${stream.statusCode}`;
if (err) {
errMsg = `${errMsg}: ${err}`;
}
}
catch (err) {
const message = 'Failed to parse error response from asset stream';
if (err instanceof Error) {
err.message = `${message}: ${err.message}`;
throw err;
}
throw new Error(message, { cause: err });
}
const streamError = new Error(errMsg);
if (stream.statusCode !== undefined) {
streamError.statusCode = stream.statusCode;
}
throw streamError;
}
this.maybeCreateAssetDirs();
debug('Asset stream ready, writing to filesystem at %s', dstPath);
const tmpPath = joinPath(this.tmpDir, dstPath);
let sha1 = '';
let md5 = '';
let size = 0;
try {
const res = await writeHashedStream(tmpPath, stream);
sha1 = res.sha1;
md5 = res.md5;
size = res.size;
}
catch (err) {
const message = 'Failed to write asset stream to filesystem';
if (err instanceof Error) {
err.message = `${message}: ${err.message}`;
throw err;
}
throw new Error(message, { cause: err });
}
// Verify it against our downloaded stream to make sure we have the same copy
const contentLength = stream.headers?.['content-length'];
const remoteSha1 = stream.headers?.['x-sanity-sha1'];
const remoteMd5 = stream.headers?.['x-sanity-md5'];
const hasHash = Boolean(remoteSha1 || remoteMd5);
const method = sha1 ? 'sha1' : 'md5';
// Asset validity is primarily determined by the sha1 hash. However, the sha1 hash is computed
// before certain processes (i.e. svg sanitization) which can result in a different hash.
// When the sha1 hashes don't match, fallback to using the md5 hash.
const sha1Differs = remoteSha1 && sha1 !== remoteSha1;
const md5Differs = remoteMd5 && md5 !== remoteMd5;
const differs = sha1Differs && md5Differs;
if (differs) {
const details = [
hasHash &&
(method === 'md5'
? `md5 should be ${remoteMd5}, got ${md5}`
: `sha1 should be ${remoteSha1}, got ${sha1}`),
contentLength &&
parseInt(String(contentLength), 10) !== size &&
`Asset should be ${contentLength} bytes, got ${size}`,
];
const detailsString = `Details:\n - ${details.filter(Boolean).join('\n - ')}`;
await rm(tmpPath, { recursive: true, force: true });
throw new Error(`Failed to download asset at ${assetDoc.url}. ${detailsString}`);
}
const isImage = assetDoc._type === 'sanity.imageAsset';
const type = isImage ? 'image' : 'file';
const id = `${type}-${sha1}`;
const metaProps = omit(assetDoc, EXCLUDE_PROPS);
if (Object.keys(metaProps).length > 0) {
this.assetMap[id] = metaProps;
}
this.downloading.splice(this.downloading.findIndex((datUrl) => datUrl === url), 1);
this.filesWritten++;
return true;
}
findAndModify = (item, action) => {
if (Array.isArray(item)) {
const children = item.map((child) => this.findAndModify(child, action));
return children.filter((child) => child != null);
}
if (!item || typeof item !== 'object') {
return item;
}
const record = item;
const isAsset = isAssetField(record);
if (isAsset && action === ACTION_REMOVE) {
return undefined;
}
if (isAsset && action === ACTION_REWRITE) {
const { asset, ...other } = record;
const assetId = asset._ref;
const assetType = getAssetType(record);
const filePath = `${assetType}s/${generateFilename(assetId)}`;
const modified = this.findAndModify(other, action);
return {
_sanityAsset: `${assetType}@file://./${filePath}`,
...(typeof modified === 'object' && modified !== null ? modified : {}),
};
}
const newItem = {};
const keys = Object.keys(record);
for (const key of keys) {
const value = record[key];
newItem[key] = this.findAndModify(value, action);
if (typeof newItem[key] === 'undefined') {
// eslint-disable-next-line @typescript-eslint/no-dynamic-delete
delete newItem[key];
}
}
return newItem;
};
}
function isAssetField(item) {
const asset = item.asset;
return Boolean(asset?._ref && typeof asset._ref === 'string' && isSanityAsset(asset._ref));
}
function getAssetType(item) {
const asset = item.asset;
if (!asset || typeof asset._ref !== 'string') {
return null;
}
const match = asset._ref.match(/^(image|file)-/);
return match?.[1] ?? null;
}
function isSanityAsset(assetId) {
return (/^image-[a-f0-9]{40}-\d+x\d+-[a-z]+$/.test(assetId) ||
/^file-[a-f0-9]{40}-[a-z0-9]+$/.test(assetId));
}
function generateFilename(assetId) {
const match = assetId.match(/^(image|file)-(.*?)(-[a-z]+)?$/);
const asset = match?.[2];
const ext = match?.[3];
const extension = (ext ?? 'bin').replace(/^-/, '');
return asset ? `${asset}.${extension}` : `${assetId}.bin`;
}
async function writeHashedStream(filePath, stream) {
let size = 0;
const md5 = createHash('md5');
const sha1 = createHash('sha1');
const hasher = through((chunk, _enc, cb) => {
size += chunk.length;
md5.update(chunk);
sha1.update(chunk);
cb(null, chunk);
});
await pipeline(stream, hasher, createWriteStream(filePath));
return {
size,
sha1: sha1.digest('hex'),
md5: md5.digest('hex'),
};
}
function tryGetErrorFromStream(stream) {
return new Promise((resolve, reject) => {
const chunks = [];
let receivedData = false;
stream.on('data', (chunk) => {
receivedData = true;
chunks.push(chunk);
});
stream.on('end', () => {
if (!receivedData) {
resolve(null);
return;
}
const body = Buffer.concat(chunks);
try {
const parsed = JSON.parse(body.toString('utf8'));
resolve(parsed.message ?? parsed.error ?? null);
}
catch {
resolve(body.toString('utf8').slice(0, 16000));
}
});
stream.on('error', reject);
});
}
function omit(obj, keys) {
const copy = {};
for (const [key, value] of Object.entries(obj)) {
if (!keys.includes(key)) {
copy[key] = value;
}
}
return copy;
}
//# sourceMappingURL=AssetHandler.js.map