@sanity/import
Version:
Import documents to a Sanity dataset
304 lines (303 loc) • 11.7 kB
JavaScript
import { basename } from 'node:path';
import { isSanityImageUrl } from '@sanity/asset-utils';
import debug from 'debug';
import pMap from 'p-map';
import { getHashedBufferForUri } from './util/getHashedBufferForUri.js';
import { progressStepper } from './util/progressStepper.js';
import { retryOnFailure } from './util/retryOnFailure.js';
import { suffixTag } from './util/suffixTag.js';
import { urlExists } from './util/urlExists.js';
const logger = debug('sanity:import');
const ASSET_UPLOAD_CONCURRENCY = 8;
const ASSET_PATCH_CONCURRENCY = 30;
const ASSET_PATCH_BATCH_SIZE = 50;
const ASSET_PATCH_BATCH_TASK_SIZE = 1000;
export async function uploadAssets(assets, options) {
const concurrency = options.assetConcurrency || ASSET_UPLOAD_CONCURRENCY;
logger('Uploading assets with a concurrency of %d', concurrency);
// Build a Map where the keys are `type#url` and the value is an array of all
// objects containing document id and path to inject asset reference to.
// `assets` is an array of objects with shape: {documentId, path, url, type}
const assetRefMap = getAssetRefMap(assets);
// We might have additional assets that is not referenced by any documents, but was part of a
// dataset when exporting, for instance. Add these to the map without any references to update.
const unreferencedAssets = options.unreferencedAssets || [];
for (const asset of unreferencedAssets){
if (!assetRefMap.has(asset)) {
assetRefMap.set(asset, []);
}
}
if (assetRefMap.size === 0) {
return {
batches: 0,
failures: []
};
}
// Create a function we can call for every completed upload to report progress
const progress = progressStepper(options.onProgress, {
step: 'Importing assets (files/images)',
total: assetRefMap.size
});
// If we should allow failures, we need to use a custom catch handler in order
// to not set the asset references for the broken assets
const ensureAssetExists = ensureAssetWithRetries.bind(null, options, progress);
const ensureMethod = options.allowFailingAssets ? (assetKey, i)=>ensureAssetExists(assetKey, i).catch((err)=>err) : ensureAssetExists;
// Loop over all unique URLs and ensure they exist, and if not, upload them
const mapOptions = {
concurrency
};
const assetIds = await pMap(assetRefMap.keys(), ensureMethod, mapOptions);
// Extract a list of all failures so we may report them and possibly retry them later
const assetFailures = getUploadFailures(assetRefMap, assetIds);
// Loop over all documents that need asset references to be set
const batches = await setAssetReferences(assetRefMap, assetIds, options);
let totalBatches = 0;
for (const batch of batches){
totalBatches += batch;
}
return {
batches: totalBatches,
failures: assetFailures
};
}
function getAssetRefMap(assets) {
const assetRefMap = new Map();
for (const item of assets){
const { documentId, path, type, url } = item;
const key = `${type}#${url}`;
let refs = assetRefMap.get(key);
if (!refs) {
refs = [];
assetRefMap.set(key, refs);
}
refs.push({
documentId,
path
});
}
return assetRefMap;
}
async function ensureAssetWithRetries(options, progress, assetKey, i) {
const [type, url] = assetKey.split('#', 2);
const { buffer, sha1hash } = await retryOnFailure(()=>downloadAsset(url, i)).catch((err)=>{
progress();
const assetError = err;
assetError.type = type;
assetError.url = url;
assetError.message = assetError.message.includes(url) ? assetError.message : `Failed to download ${type} @ ${url}:\n${assetError.message}`;
throw assetError;
});
const asset = {
buffer,
sha1hash,
type: type,
url: url
};
return retryOnFailure(()=>ensureAsset(asset, options, i)).then((result)=>{
progress();
return result;
}).catch((err)=>{
progress();
const assetError = err;
assetError.type = type;
assetError.url = url;
assetError.message = assetError.message.includes(url) ? assetError.message : `Failed to upload ${type} @ ${url}:\n${assetError.message}`;
throw assetError;
});
}
function downloadAsset(url, i) {
// Download the asset in order for us to create a hash
logger('[Asset #%d] Downloading %s', i, url);
return getHashedBufferForUri(url);
}
async function ensureAsset(asset, options, i) {
const { buffer, sha1hash, type, url } = asset;
const { assetMap = {}, client, replaceAssets, tag } = options;
// See if the item exists on the server
if (!replaceAssets) {
logger('[Asset #%d] Checking for asset with hash %s', i, sha1hash);
const assetDocId = await getAssetDocumentIdForHash(client, type, sha1hash, 0, suffixTag(tag, 'asset.get-id'));
if (assetDocId) {
// Same hash means we want to reuse the asset
logger('[Asset #%d] Found %s for hash %s', i, type, sha1hash);
return assetDocId;
}
}
const assetMeta = assetMap[`${type}-${sha1hash}`];
const hasFilename = assetMeta && assetMeta.originalFilename;
const hasNonFilenameMeta = assetMeta && Object.keys(assetMap).length > 1;
const { pathname } = new URL(url);
const filename = hasFilename ? assetMeta.originalFilename : basename(pathname || '');
// If it doesn't exist, we want to upload it
logger('[Asset #%d] Uploading %s with URL %s', i, type, url);
const uploadOptions = {
tag: suffixTag(tag, 'asset.upload')
};
if (filename) {
uploadOptions.filename = filename;
}
const assetDoc = await client.assets.upload(type, buffer, uploadOptions);
// If we have more metadata to provide, update the asset document
if (hasNonFilenameMeta) {
await client.patch(assetDoc._id).set(assetMeta).commit({
tag: suffixTag(tag, 'asset.add-meta'),
visibility: 'async'
});
}
return assetDoc._id;
}
async function getAssetDocumentIdForHash(client, type, sha1hash, attemptNum, tag) {
// @todo remove retry logic when client has reintroduced it
try {
const dataType = type === 'file' ? 'sanity.fileAsset' : 'sanity.imageAsset';
const query = '*[_type == $dataType && sha1hash == $sha1hash][0]{_id, url}';
const assetDoc = await client.fetch(query, {
dataType,
sha1hash
}, {
tag
});
if (!assetDoc || !assetDoc.url) {
return null;
}
// By adding `fm=json` to image requests, we do a slightly cheaper operation
const assetUrl = isSanityImageUrl(assetDoc.url) ? `${assetDoc.url}?fm=json` : assetDoc.url;
const exists = await urlExists(assetUrl);
if (!exists) {
logger(`Asset document ${assetDoc._id} exists, but file does not. Overwriting.`);
return null;
}
return assetDoc._id;
} catch (err) {
if (attemptNum < 3) {
return getAssetDocumentIdForHash(client, type, sha1hash, attemptNum + 1, tag);
}
const errorWithAttempts = err;
errorWithAttempts.attempts = attemptNum;
throw new Error(`Error while attempt to query Sanity API:\n${errorWithAttempts.message}`);
}
}
function getUploadFailures(assetRefMap, assetIds) {
const failures = [];
const lookup = assetRefMap.values();
for (const assetId of assetIds){
const documents = lookup.next().value;
if (typeof assetId === 'string') {
continue;
}
const errorWithUrl = assetId;
failures.push({
documents: documents ? documents.map(({ documentId, path })=>({
documentId,
path
})) : [],
type: 'asset',
url: errorWithUrl.url
});
}
return failures;
}
function setAssetReferences(assetRefMap, assetIds, options) {
const { client, tag } = options;
const lookup = assetRefMap.values();
// Collects patch tasks per document to avoid patching the same document multiple times
const patchTasksPerDoc = {};
for (const assetId of assetIds){
const documents = lookup.next().value;
if (typeof assetId !== 'string') {
continue;
}
if (documents) {
for (const { documentId, path } of documents){
patchTasksPerDoc[documentId] = patchTasksPerDoc[documentId] || [];
patchTasksPerDoc[documentId].push({
assetId,
path
});
}
}
}
const patchTasks = Object.entries(patchTasksPerDoc).map(([documentId, tasks])=>({
documentId,
tasks
}));
// We now have an array of tasks per document, each containing:
// {documentId: string, tasks: [{path, assetId}]}
// Instead of doing a single mutation per document, let's batch them up
const batches = [];
for (const task of patchTasks){
if (batches.length === 0) {
batches.push([
task
]);
continue;
}
const currentBatch = batches.at(-1);
let overallSize = 0;
for (const add of currentBatch){
overallSize += add.tasks ? add.tasks.length : 0;
}
if (overallSize + task.tasks.length > ASSET_PATCH_BATCH_TASK_SIZE || currentBatch.length >= ASSET_PATCH_BATCH_SIZE) {
// Create a new batch if the current one is full
batches.push([
task
]);
continue;
}
currentBatch.push(task);
}
if (batches.length === 0) {
return Promise.resolve([
0
]);
}
// Since separate progress step for batches of reference sets
const progress = progressStepper(options.onProgress, {
step: 'Setting asset references to documents',
total: batches.length
});
// Now perform the batch operations in parallel with a given concurrency
const mapOptions = {
concurrency: ASSET_PATCH_CONCURRENCY
};
const setAssetRefs = setAssetReferenceBatch.bind(null, client, progress, tag);
return pMap(batches, setAssetRefs, mapOptions);
}
function setAssetReferenceBatch(client, progress, tag, batch) {
logger('Setting asset references on %d documents', batch.length);
return retryOnFailure(()=>{
let trx = client.transaction();
for (const documentTasks of batch){
trx = reducePatch(trx, documentTasks);
}
return trx.commit({
tag: suffixTag(tag, 'asset.set-refs'),
visibility: 'async'
}).then(progress).then(()=>{
let total = 0;
for (const add of batch){
total += add.tasks.length;
}
return total;
});
});
}
function getAssetType(assetId) {
return assetId.slice(0, assetId.indexOf('-'));
}
function reducePatch(trx, documentTasks) {
return trx.patch(documentTasks.documentId, (patch)=>{
for (const task of documentTasks.tasks)patch.setIfMissing({
[task.path]: {
_type: getAssetType(task.assetId)
}
}).set({
[`${task.path}.asset`]: {
_ref: task.assetId,
_type: 'reference'
}
});
return patch;
});
}
//# sourceMappingURL=uploadAssets.js.map