@strapi/data-transfer
Version:
Data transfer capabilities for Strapi
537 lines (534 loc) • 24.1 kB
JavaScript
import { createHash } from 'crypto';
import { PassThrough, Writable } from 'stream';
import { castArray } from 'lodash/fp';
import { ProviderValidationError, ProviderTransferError } from '../../../errors/providers.mjs';
import { TRANSFER_PATH } from '../../remote/constants.mjs';
import { decodeTransferAssetStreamItem } from '../../../utils/transfer-asset-chunk.mjs';
import { write } from '../../../utils/writable-async-write.mjs';
import { trimTrailingSlash, connectToWebsocket, createDispatcher } from '../utils.mjs';
function _class_private_field_loose_base(receiver, privateKey) {
if (!Object.prototype.hasOwnProperty.call(receiver, privateKey)) {
throw new TypeError("attempted to use private field on non-instance");
}
return receiver;
}
var id = 0;
function _class_private_field_loose_key(name) {
return "__private_" + id++ + "_" + name;
}
/**
* Pull server answers `assets` step `start` only after `estimateAssetTotals` (DB stream; remote sizes from DB when complete, else HTTP like `createAssetsStream`).
* That can exceed the default dispatcher wait (~30s between resends, a few minutes total). This message
* uses a longer window so large libraries do not fail with `Request timed out` before totals are returned.
*/ const ASSETS_START_RETRY_OVERRIDES = {
retryMessageTimeout: 120000,
retryMessageMaxRetries: 30
};
var _diagnostics = /*#__PURE__*/ _class_private_field_loose_key("_diagnostics"), _pullAssetStreamWireSampleLogged = /*#__PURE__*/ _class_private_field_loose_key("_pullAssetStreamWireSampleLogged"), _checksumsEnabled = /*#__PURE__*/ _class_private_field_loose_key("_checksumsEnabled"), /** Set from pull server `start` response for `assets` when present (for engine `getStageTotals`). */ _cachedAssetsTotals = /*#__PURE__*/ _class_private_field_loose_key("_cachedAssetsTotals"), _createStageReadStream = /*#__PURE__*/ _class_private_field_loose_key("_createStageReadStream"), _reportInfo = /*#__PURE__*/ _class_private_field_loose_key("_reportInfo"), /** Reports a warning diagnostic (`kind: 'warning'`). Consumers (e.g. CLI) choose log levels and routing. */ _reportWarning = /*#__PURE__*/ _class_private_field_loose_key("_reportWarning"), _startStep = /*#__PURE__*/ _class_private_field_loose_key("_startStep"), _respond = /*#__PURE__*/ _class_private_field_loose_key("_respond"), _endStep = /*#__PURE__*/ _class_private_field_loose_key("_endStep");
class RemoteStrapiSourceProvider {
createEntitiesReadStream() {
return _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('entities');
}
createLinksReadStream() {
return _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('links');
}
async createAssetsReadStream() {
// Create the streams used to transfer the assets
const stream = await _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('assets');
const pass = new PassThrough({
objectMode: true
});
// Init the asset map
const assets = {};
// Watch for stalled assets: no remote chunks and no completed writes to the asset stream for streamTimeout ms
const resetTimeout = (assetID)=>{
if (!assets[assetID]) {
return;
}
if (assets[assetID].timeout) {
clearTimeout(assets[assetID].timeout);
}
assets[assetID].timeout = setTimeout(()=>{
if (!assets[assetID]) {
return;
}
_class_private_field_loose_base(this, _reportInfo)[_reportInfo](`Asset ${assetID} transfer stalled, aborting.`);
assets[assetID].status = 'errored';
assets[assetID].stream.destroy(new Error(`Asset ${assetID} transfer timed out`));
}, this.options.streamTimeout);
};
const clearStallTimeoutForAsset = (assetID)=>{
const entry = assets[assetID];
if (entry?.timeout) {
clearTimeout(entry.timeout);
entry.timeout = undefined;
}
};
const clearAllStallTimeouts = ()=>{
for (const id of Object.keys(assets)){
clearStallTimeoutForAsset(id);
}
};
/**
* Serialize asset batch handling: `Readable.on('data', async …)` does not apply backpressure,
* so we pipe through a Writable with highWaterMark 1 so only one batch is in flight.
*/ const processAssetPayload = async (payload)=>{
for (const item of payload){
const { action, assetID } = item;
if (action === 'start') {
if (assets[assetID]) {
throw new Error(`Asset ${assetID} already started`);
}
_class_private_field_loose_base(this, _reportInfo)[_reportInfo](`Asset ${assetID} starting`);
assets[assetID] = {
...item.data,
stream: new PassThrough(),
status: 'ok',
queue: [],
..._class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] ? {
checksumHash: createHash('sha256')
} : {}
};
resetTimeout(assetID);
await write(pass, assets[assetID]);
} else if (action === 'stream' || action === 'end') {
if (!assets[assetID]) {
throw new Error(`No id matching ${assetID} for stream action`);
}
if (action === 'stream') {
if (!_class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged]) {
_class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged] = true;
const { data } = item;
// Same legacy shape `decodeTransferAssetStreamData` accepts after JSON.parse (proof, not frame-size guess).
const legacyBufferJson = data && typeof data === 'object' && !Buffer.isBuffer(data) && data.type === 'Buffer' && (Array.isArray(data.data) || ArrayBuffer.isView(data.data));
if (legacyBufferJson) {
_class_private_field_loose_base(this, _reportWarning)[_reportWarning]('[Data transfer][pull] Remote is using legacy Buffer JSON for asset chunks (each byte as a JSON number). That uses much more memory during JSON.parse than base64. Upgrade the remote Strapi to a version that sends base64 asset chunks, or out-of-memory errors may still happen on large files.');
}
}
resetTimeout(assetID);
} else {
clearTimeout(assets[assetID].timeout);
}
if (assets[assetID].status === 'closed') {
throw new Error(`Asset ${assetID} is closed`);
}
assets[assetID].queue.push(item);
}
}
for(const assetID in assets){
if (Object.prototype.hasOwnProperty.call(assets, assetID)) {
const asset = assets[assetID];
if (asset.queue?.length > 0) {
await processQueue(assetID);
}
}
}
};
const processor = new Writable({
objectMode: true,
highWaterMark: 1,
write (payload, _encoding, callback) {
processAssetPayload(payload).then(()=>{
callback();
}, (err)=>{
clearAllStallTimeouts();
stream.destroy(err);
callback(err);
});
},
final (callback) {
pass.end();
callback();
}
});
processor.on('error', (err)=>{
clearAllStallTimeouts();
pass.destroy(err);
});
stream.on('error', (err)=>{
clearAllStallTimeouts();
processor.destroy(err);
pass.destroy(err);
});
stream.once('end', ()=>{
clearAllStallTimeouts();
});
stream.pipe(processor);
/**
* Start processing the queue for a given assetID
*
* Even though this is a loop that attempts to process the entire queue, it is safe to call this more than once
* for the same asset id because the queue is shared globally, the items are shifted off, and immediately written
*/ const processQueue = async (id)=>{
if (!assets[id]) {
throw new Error(`Failed to write asset chunk for "${id}". Asset not found.`);
}
const asset = assets[id];
const { status: currentStatus } = asset;
if ([
'closed',
'errored'
].includes(currentStatus)) {
throw new Error(`Failed to write asset chunk for "${id}". The asset is currently "${currentStatus}"`);
}
while(asset.queue.length > 0){
const data = asset.queue.shift();
if (!data) {
throw new Error(`Invalid chunk found for ${id}`);
}
try {
// if this is an end chunk, close the asset stream
if (data.action === 'end') {
_class_private_field_loose_base(this, _reportInfo)[_reportInfo](`Ending asset stream for ${id}`);
await closeAssetStream(id, data.checksum);
break; // Exit the loop after closing the stream
}
// Save the current chunk
await writeChunkToStream(id, data);
} catch (error) {
if (!assets[id]) {
throw new Error(`No id matching ${id} for writeAssetChunk`);
}
clearStallTimeoutForAsset(id);
if (error instanceof Error) {
throw error;
}
throw new Error(`Unexpected error while processing asset chunk for "${id}"`);
}
}
};
/**
* Writes a chunk of data to the asset's stream.
*
* Only check if the targeted asset exists, no other validation is done.
*/ const writeChunkToStream = async (id, item)=>{
const asset = assets[id];
if (!asset) {
throw new Error(`Failed to write asset chunk for "${id}". Asset not found.`);
}
if (item.action !== 'stream') {
throw new Error(`Expected stream queue item for "${id}"`);
}
const chunk = decodeTransferAssetStreamItem(item);
asset.checksumHash?.update(chunk);
await write(asset.stream, chunk);
// Count slow draining as progress so backpressure on large chunks does not trip the stall timer
resetTimeout(id);
};
/**
* Closes the asset stream associated with the given ID.
*
* It deletes the stream for the asset upon successful closure.
*/ const closeAssetStream = async (id, checksum)=>{
if (!assets[id]) {
throw new Error(`Failed to close asset "${id}". Asset not found.`);
}
const asset = assets[id];
// The queue processes stream chunks before `end`; the last `writeChunkToStream` calls
// `resetTimeout` after the `end` chunk already cleared the timer — clear again before closing.
clearStallTimeoutForAsset(id);
if (_class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled]) {
if (!checksum) {
throw new ProviderTransferError(`Asset ${id} is missing checksum in transfer end payload`);
}
if (checksum.algorithm !== 'sha256') {
throw new ProviderTransferError(`Asset ${id} checksum algorithm "${checksum.algorithm}" is not supported`);
}
const actual = asset.checksumHash?.digest('hex');
if (!actual || actual !== checksum.value) {
throw new ProviderTransferError(`Checksum mismatch for asset "${id}" (expected ${checksum.value}, got ${actual ?? 'none'})`);
}
}
asset.status = 'closed';
await new Promise((resolve, reject)=>{
const { stream } = asset;
stream.on('close', ()=>{
delete assets[id];
resolve();
}).on('error', (e)=>{
delete assets[id];
reject(new Error(`Failed to close asset "${id}". Asset stream error: ${e.toString()}`));
}).end();
});
};
return pass;
}
createConfigurationReadStream() {
return _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('configuration');
}
async getMetadata() {
const metadata = await this.dispatcher?.dispatchTransferAction('getMetadata');
return metadata ?? null;
}
assertValidProtocol(url) {
const validProtocols = [
'https:',
'http:'
];
if (!validProtocols.includes(url.protocol)) {
throw new ProviderValidationError(`Invalid protocol "${url.protocol}"`, {
check: 'url',
details: {
protocol: url.protocol,
validProtocols
}
});
}
}
async initTransfer() {
const wantsChecksums = this.options.verifyChecksums === true;
const query = this.dispatcher?.dispatchCommand({
command: 'init',
...wantsChecksums ? {
params: {
transfer: 'pull',
checksums: true
}
} : {}
});
const res = await query;
if (!res?.transferID) {
throw new ProviderTransferError('Init failed, invalid response from the server');
}
_class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] = wantsChecksums && res.checksums === true;
if (wantsChecksums && res.checksums !== true) {
_class_private_field_loose_base(this, _reportWarning)[_reportWarning]('[Data transfer][pull] Checksums were requested but the remote does not support checksum negotiation; continuing without checksum validation.');
}
return res.transferID;
}
async bootstrap(diagnostics) {
_class_private_field_loose_base(this, _diagnostics)[_diagnostics] = diagnostics;
const { url, auth } = this.options;
let ws;
this.assertValidProtocol(url);
const wsProtocol = url.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${wsProtocol}//${url.host}${trimTrailingSlash(url.pathname)}${TRANSFER_PATH}/pull`;
_class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged] = false;
_class_private_field_loose_base(this, _reportInfo)[_reportInfo]('establishing websocket connection');
// No auth defined, trying public access for transfer
if (!auth) {
ws = await connectToWebsocket(wsUrl, undefined, _class_private_field_loose_base(this, _diagnostics)[_diagnostics]);
} else if (auth.type === 'token') {
const headers = {
Authorization: `Bearer ${auth.token}`
};
ws = await connectToWebsocket(wsUrl, {
headers
}, _class_private_field_loose_base(this, _diagnostics)[_diagnostics]);
} else {
throw new ProviderValidationError('Auth method not available', {
check: 'auth.type',
details: {
auth: auth.type
}
});
}
_class_private_field_loose_base(this, _reportInfo)[_reportInfo]('established websocket connection');
this.ws = ws;
const { retryMessageOptions } = this.options;
_class_private_field_loose_base(this, _reportInfo)[_reportInfo]('creating dispatcher');
this.dispatcher = createDispatcher(this.ws, retryMessageOptions, (message)=>_class_private_field_loose_base(this, _reportInfo)[_reportInfo](message));
_class_private_field_loose_base(this, _reportInfo)[_reportInfo]('created dispatcher');
_class_private_field_loose_base(this, _reportInfo)[_reportInfo]('initialize transfer');
const transferID = await this.initTransfer();
_class_private_field_loose_base(this, _reportInfo)[_reportInfo](`initialized transfer ${transferID}`);
this.dispatcher.setTransferProperties({
id: transferID,
kind: 'pull'
});
await this.dispatcher.dispatchTransferAction('bootstrap');
}
async close() {
await this.dispatcher?.dispatchTransferAction('close');
await new Promise((resolve)=>{
const { ws } = this;
if (!ws || ws.CLOSED) {
resolve();
return;
}
ws.on('close', ()=>resolve()).close();
});
}
async getSchemas() {
const schemas = await this.dispatcher?.dispatchTransferAction('getSchemas');
return schemas ?? null;
}
async getStageTotals(stage) {
if (stage !== 'assets') {
return null;
}
const cached = _class_private_field_loose_base(this, _cachedAssetsTotals)[_cachedAssetsTotals];
return cached ?? null;
}
constructor(options){
Object.defineProperty(this, _createStageReadStream, {
value: createStageReadStream
});
Object.defineProperty(this, _reportInfo, {
value: reportInfo
});
Object.defineProperty(this, _reportWarning, {
value: reportWarning
});
Object.defineProperty(this, _startStep, {
value: startStep
});
Object.defineProperty(this, _respond, {
value: respond
});
Object.defineProperty(this, _endStep, {
value: endStep
});
Object.defineProperty(this, _diagnostics, {
writable: true,
value: void 0
});
Object.defineProperty(this, _pullAssetStreamWireSampleLogged, {
writable: true,
value: void 0
});
Object.defineProperty(this, _checksumsEnabled, {
writable: true,
value: void 0
});
Object.defineProperty(this, _cachedAssetsTotals, {
writable: true,
value: void 0
});
this.name = 'source::remote-strapi';
this.type = 'source';
this.defaultOptions = {
// Large files + JSON/WS backpressure can go minutes between *messages* while bytes still drain locally
streamTimeout: 300000
};
_class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged] = false;
_class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] = false;
this.options = {
...this.defaultOptions,
...options
};
_class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] = this.options.verifyChecksums === true;
this.ws = null;
this.dispatcher = null;
}
}
async function createStageReadStream(stage) {
if (stage === 'assets') {
_class_private_field_loose_base(this, _cachedAssetsTotals)[_cachedAssetsTotals] = undefined;
}
const startResult = await _class_private_field_loose_base(this, _startStep)[_startStep](stage);
if (startResult instanceof Error) {
throw startResult;
}
const { id: processID, totals } = startResult;
if (stage === 'assets' && totals && (totals.totalBytes != null || totals.totalCount != null)) {
_class_private_field_loose_base(this, _cachedAssetsTotals)[_cachedAssetsTotals] = totals;
}
// Default object-mode HWM (~16 chunks). Do not await `drain` on manual `push` while `pipe()`
// is attached — drain/`readableLength` races reliably deadlock after a few 1MiB asset frames.
// Backpressure for pull assets is enforced by the Writable below (`highWaterMark: 1`).
const stream = new PassThrough({
objectMode: true
});
const listener = async (raw)=>{
const parsed = JSON.parse(raw.toString());
// If not a message related to our transfer process, ignore it
if (!parsed.uuid || parsed?.data?.type !== 'transfer' || parsed?.data?.id !== processID) {
this.ws?.once('message', listener);
return;
}
const { uuid, data: message } = parsed;
const { ended, error, data } = message;
if (error) {
await _class_private_field_loose_base(this, _respond)[_respond](uuid);
stream.destroy(error);
return;
}
if (ended) {
await _class_private_field_loose_base(this, _respond)[_respond](uuid);
await _class_private_field_loose_base(this, _endStep)[_endStep](stage);
stream.end();
return;
}
for (const item of castArray(data)){
stream.push(item);
}
this.ws?.once('message', listener);
await _class_private_field_loose_base(this, _respond)[_respond](uuid);
};
this.ws?.once('message', listener);
return stream;
}
function reportInfo(message) {
_class_private_field_loose_base(this, _diagnostics)[_diagnostics]?.report({
details: {
createdAt: new Date(),
message,
origin: 'remote-source-provider'
},
kind: 'info'
});
}
function reportWarning(message) {
_class_private_field_loose_base(this, _diagnostics)[_diagnostics]?.report({
details: {
createdAt: new Date(),
message,
origin: 'remote-source-provider'
},
kind: 'warning'
});
}
async function startStep(step) {
try {
return await this.dispatcher?.dispatchTransferStep({
action: 'start',
step
}, step === 'assets' ? {
retryOverrides: ASSETS_START_RETRY_OVERRIDES
} : undefined);
} catch (e) {
if (e instanceof Error) {
return e;
}
if (typeof e === 'string') {
return new ProviderTransferError(e);
}
return new ProviderTransferError('Unexpected error');
}
}
async function respond(uuid) {
return new Promise((resolve, reject)=>{
this.ws?.send(JSON.stringify({
uuid
}), (e)=>{
if (e) {
reject(e);
} else {
resolve(e);
}
});
});
}
async function endStep(step) {
try {
await this.dispatcher?.dispatchTransferStep({
action: 'end',
step
});
} catch (e) {
if (e instanceof Error) {
return e;
}
if (typeof e === 'string') {
return new ProviderTransferError(e);
}
return new ProviderTransferError('Unexpected error');
}
return null;
}
const createRemoteStrapiSourceProvider = (options)=>{
return new RemoteStrapiSourceProvider(options);
};
export { createRemoteStrapiSourceProvider };
//# sourceMappingURL=index.mjs.map