UNPKG

@strapi/data-transfer

Version:

Data transfer capabilities for Strapi

537 lines (534 loc) 24.1 kB
import { createHash } from 'crypto'; import { PassThrough, Writable } from 'stream'; import { castArray } from 'lodash/fp'; import { ProviderValidationError, ProviderTransferError } from '../../../errors/providers.mjs'; import { TRANSFER_PATH } from '../../remote/constants.mjs'; import { decodeTransferAssetStreamItem } from '../../../utils/transfer-asset-chunk.mjs'; import { write } from '../../../utils/writable-async-write.mjs'; import { trimTrailingSlash, connectToWebsocket, createDispatcher } from '../utils.mjs'; function _class_private_field_loose_base(receiver, privateKey) { if (!Object.prototype.hasOwnProperty.call(receiver, privateKey)) { throw new TypeError("attempted to use private field on non-instance"); } return receiver; } var id = 0; function _class_private_field_loose_key(name) { return "__private_" + id++ + "_" + name; } /** * Pull server answers `assets` step `start` only after `estimateAssetTotals` (DB stream; remote sizes from DB when complete, else HTTP like `createAssetsStream`). * That can exceed the default dispatcher wait (~30s between resends, a few minutes total). This message * uses a longer window so large libraries do not fail with `Request timed out` before totals are returned. */ const ASSETS_START_RETRY_OVERRIDES = { retryMessageTimeout: 120000, retryMessageMaxRetries: 30 }; var _diagnostics = /*#__PURE__*/ _class_private_field_loose_key("_diagnostics"), _pullAssetStreamWireSampleLogged = /*#__PURE__*/ _class_private_field_loose_key("_pullAssetStreamWireSampleLogged"), _checksumsEnabled = /*#__PURE__*/ _class_private_field_loose_key("_checksumsEnabled"), /** Set from pull server `start` response for `assets` when present (for engine `getStageTotals`). */ _cachedAssetsTotals = /*#__PURE__*/ _class_private_field_loose_key("_cachedAssetsTotals"), _createStageReadStream = /*#__PURE__*/ _class_private_field_loose_key("_createStageReadStream"), _reportInfo = /*#__PURE__*/ _class_private_field_loose_key("_reportInfo"), /** Reports a warning diagnostic (`kind: 'warning'`). Consumers (e.g. CLI) choose log levels and routing. */ _reportWarning = /*#__PURE__*/ _class_private_field_loose_key("_reportWarning"), _startStep = /*#__PURE__*/ _class_private_field_loose_key("_startStep"), _respond = /*#__PURE__*/ _class_private_field_loose_key("_respond"), _endStep = /*#__PURE__*/ _class_private_field_loose_key("_endStep"); class RemoteStrapiSourceProvider { createEntitiesReadStream() { return _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('entities'); } createLinksReadStream() { return _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('links'); } async createAssetsReadStream() { // Create the streams used to transfer the assets const stream = await _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('assets'); const pass = new PassThrough({ objectMode: true }); // Init the asset map const assets = {}; // Watch for stalled assets: no remote chunks and no completed writes to the asset stream for streamTimeout ms const resetTimeout = (assetID)=>{ if (!assets[assetID]) { return; } if (assets[assetID].timeout) { clearTimeout(assets[assetID].timeout); } assets[assetID].timeout = setTimeout(()=>{ if (!assets[assetID]) { return; } _class_private_field_loose_base(this, _reportInfo)[_reportInfo](`Asset ${assetID} transfer stalled, aborting.`); assets[assetID].status = 'errored'; assets[assetID].stream.destroy(new Error(`Asset ${assetID} transfer timed out`)); }, this.options.streamTimeout); }; const clearStallTimeoutForAsset = (assetID)=>{ const entry = assets[assetID]; if (entry?.timeout) { clearTimeout(entry.timeout); entry.timeout = undefined; } }; const clearAllStallTimeouts = ()=>{ for (const id of Object.keys(assets)){ clearStallTimeoutForAsset(id); } }; /** * Serialize asset batch handling: `Readable.on('data', async …)` does not apply backpressure, * so we pipe through a Writable with highWaterMark 1 so only one batch is in flight. */ const processAssetPayload = async (payload)=>{ for (const item of payload){ const { action, assetID } = item; if (action === 'start') { if (assets[assetID]) { throw new Error(`Asset ${assetID} already started`); } _class_private_field_loose_base(this, _reportInfo)[_reportInfo](`Asset ${assetID} starting`); assets[assetID] = { ...item.data, stream: new PassThrough(), status: 'ok', queue: [], ..._class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] ? { checksumHash: createHash('sha256') } : {} }; resetTimeout(assetID); await write(pass, assets[assetID]); } else if (action === 'stream' || action === 'end') { if (!assets[assetID]) { throw new Error(`No id matching ${assetID} for stream action`); } if (action === 'stream') { if (!_class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged]) { _class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged] = true; const { data } = item; // Same legacy shape `decodeTransferAssetStreamData` accepts after JSON.parse (proof, not frame-size guess). const legacyBufferJson = data && typeof data === 'object' && !Buffer.isBuffer(data) && data.type === 'Buffer' && (Array.isArray(data.data) || ArrayBuffer.isView(data.data)); if (legacyBufferJson) { _class_private_field_loose_base(this, _reportWarning)[_reportWarning]('[Data transfer][pull] Remote is using legacy Buffer JSON for asset chunks (each byte as a JSON number). That uses much more memory during JSON.parse than base64. Upgrade the remote Strapi to a version that sends base64 asset chunks, or out-of-memory errors may still happen on large files.'); } } resetTimeout(assetID); } else { clearTimeout(assets[assetID].timeout); } if (assets[assetID].status === 'closed') { throw new Error(`Asset ${assetID} is closed`); } assets[assetID].queue.push(item); } } for(const assetID in assets){ if (Object.prototype.hasOwnProperty.call(assets, assetID)) { const asset = assets[assetID]; if (asset.queue?.length > 0) { await processQueue(assetID); } } } }; const processor = new Writable({ objectMode: true, highWaterMark: 1, write (payload, _encoding, callback) { processAssetPayload(payload).then(()=>{ callback(); }, (err)=>{ clearAllStallTimeouts(); stream.destroy(err); callback(err); }); }, final (callback) { pass.end(); callback(); } }); processor.on('error', (err)=>{ clearAllStallTimeouts(); pass.destroy(err); }); stream.on('error', (err)=>{ clearAllStallTimeouts(); processor.destroy(err); pass.destroy(err); }); stream.once('end', ()=>{ clearAllStallTimeouts(); }); stream.pipe(processor); /** * Start processing the queue for a given assetID * * Even though this is a loop that attempts to process the entire queue, it is safe to call this more than once * for the same asset id because the queue is shared globally, the items are shifted off, and immediately written */ const processQueue = async (id)=>{ if (!assets[id]) { throw new Error(`Failed to write asset chunk for "${id}". Asset not found.`); } const asset = assets[id]; const { status: currentStatus } = asset; if ([ 'closed', 'errored' ].includes(currentStatus)) { throw new Error(`Failed to write asset chunk for "${id}". The asset is currently "${currentStatus}"`); } while(asset.queue.length > 0){ const data = asset.queue.shift(); if (!data) { throw new Error(`Invalid chunk found for ${id}`); } try { // if this is an end chunk, close the asset stream if (data.action === 'end') { _class_private_field_loose_base(this, _reportInfo)[_reportInfo](`Ending asset stream for ${id}`); await closeAssetStream(id, data.checksum); break; // Exit the loop after closing the stream } // Save the current chunk await writeChunkToStream(id, data); } catch (error) { if (!assets[id]) { throw new Error(`No id matching ${id} for writeAssetChunk`); } clearStallTimeoutForAsset(id); if (error instanceof Error) { throw error; } throw new Error(`Unexpected error while processing asset chunk for "${id}"`); } } }; /** * Writes a chunk of data to the asset's stream. * * Only check if the targeted asset exists, no other validation is done. */ const writeChunkToStream = async (id, item)=>{ const asset = assets[id]; if (!asset) { throw new Error(`Failed to write asset chunk for "${id}". Asset not found.`); } if (item.action !== 'stream') { throw new Error(`Expected stream queue item for "${id}"`); } const chunk = decodeTransferAssetStreamItem(item); asset.checksumHash?.update(chunk); await write(asset.stream, chunk); // Count slow draining as progress so backpressure on large chunks does not trip the stall timer resetTimeout(id); }; /** * Closes the asset stream associated with the given ID. * * It deletes the stream for the asset upon successful closure. */ const closeAssetStream = async (id, checksum)=>{ if (!assets[id]) { throw new Error(`Failed to close asset "${id}". Asset not found.`); } const asset = assets[id]; // The queue processes stream chunks before `end`; the last `writeChunkToStream` calls // `resetTimeout` after the `end` chunk already cleared the timer — clear again before closing. clearStallTimeoutForAsset(id); if (_class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled]) { if (!checksum) { throw new ProviderTransferError(`Asset ${id} is missing checksum in transfer end payload`); } if (checksum.algorithm !== 'sha256') { throw new ProviderTransferError(`Asset ${id} checksum algorithm "${checksum.algorithm}" is not supported`); } const actual = asset.checksumHash?.digest('hex'); if (!actual || actual !== checksum.value) { throw new ProviderTransferError(`Checksum mismatch for asset "${id}" (expected ${checksum.value}, got ${actual ?? 'none'})`); } } asset.status = 'closed'; await new Promise((resolve, reject)=>{ const { stream } = asset; stream.on('close', ()=>{ delete assets[id]; resolve(); }).on('error', (e)=>{ delete assets[id]; reject(new Error(`Failed to close asset "${id}". Asset stream error: ${e.toString()}`)); }).end(); }); }; return pass; } createConfigurationReadStream() { return _class_private_field_loose_base(this, _createStageReadStream)[_createStageReadStream]('configuration'); } async getMetadata() { const metadata = await this.dispatcher?.dispatchTransferAction('getMetadata'); return metadata ?? null; } assertValidProtocol(url) { const validProtocols = [ 'https:', 'http:' ]; if (!validProtocols.includes(url.protocol)) { throw new ProviderValidationError(`Invalid protocol "${url.protocol}"`, { check: 'url', details: { protocol: url.protocol, validProtocols } }); } } async initTransfer() { const wantsChecksums = this.options.verifyChecksums === true; const query = this.dispatcher?.dispatchCommand({ command: 'init', ...wantsChecksums ? { params: { transfer: 'pull', checksums: true } } : {} }); const res = await query; if (!res?.transferID) { throw new ProviderTransferError('Init failed, invalid response from the server'); } _class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] = wantsChecksums && res.checksums === true; if (wantsChecksums && res.checksums !== true) { _class_private_field_loose_base(this, _reportWarning)[_reportWarning]('[Data transfer][pull] Checksums were requested but the remote does not support checksum negotiation; continuing without checksum validation.'); } return res.transferID; } async bootstrap(diagnostics) { _class_private_field_loose_base(this, _diagnostics)[_diagnostics] = diagnostics; const { url, auth } = this.options; let ws; this.assertValidProtocol(url); const wsProtocol = url.protocol === 'https:' ? 'wss:' : 'ws:'; const wsUrl = `${wsProtocol}//${url.host}${trimTrailingSlash(url.pathname)}${TRANSFER_PATH}/pull`; _class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged] = false; _class_private_field_loose_base(this, _reportInfo)[_reportInfo]('establishing websocket connection'); // No auth defined, trying public access for transfer if (!auth) { ws = await connectToWebsocket(wsUrl, undefined, _class_private_field_loose_base(this, _diagnostics)[_diagnostics]); } else if (auth.type === 'token') { const headers = { Authorization: `Bearer ${auth.token}` }; ws = await connectToWebsocket(wsUrl, { headers }, _class_private_field_loose_base(this, _diagnostics)[_diagnostics]); } else { throw new ProviderValidationError('Auth method not available', { check: 'auth.type', details: { auth: auth.type } }); } _class_private_field_loose_base(this, _reportInfo)[_reportInfo]('established websocket connection'); this.ws = ws; const { retryMessageOptions } = this.options; _class_private_field_loose_base(this, _reportInfo)[_reportInfo]('creating dispatcher'); this.dispatcher = createDispatcher(this.ws, retryMessageOptions, (message)=>_class_private_field_loose_base(this, _reportInfo)[_reportInfo](message)); _class_private_field_loose_base(this, _reportInfo)[_reportInfo]('created dispatcher'); _class_private_field_loose_base(this, _reportInfo)[_reportInfo]('initialize transfer'); const transferID = await this.initTransfer(); _class_private_field_loose_base(this, _reportInfo)[_reportInfo](`initialized transfer ${transferID}`); this.dispatcher.setTransferProperties({ id: transferID, kind: 'pull' }); await this.dispatcher.dispatchTransferAction('bootstrap'); } async close() { await this.dispatcher?.dispatchTransferAction('close'); await new Promise((resolve)=>{ const { ws } = this; if (!ws || ws.CLOSED) { resolve(); return; } ws.on('close', ()=>resolve()).close(); }); } async getSchemas() { const schemas = await this.dispatcher?.dispatchTransferAction('getSchemas'); return schemas ?? null; } async getStageTotals(stage) { if (stage !== 'assets') { return null; } const cached = _class_private_field_loose_base(this, _cachedAssetsTotals)[_cachedAssetsTotals]; return cached ?? null; } constructor(options){ Object.defineProperty(this, _createStageReadStream, { value: createStageReadStream }); Object.defineProperty(this, _reportInfo, { value: reportInfo }); Object.defineProperty(this, _reportWarning, { value: reportWarning }); Object.defineProperty(this, _startStep, { value: startStep }); Object.defineProperty(this, _respond, { value: respond }); Object.defineProperty(this, _endStep, { value: endStep }); Object.defineProperty(this, _diagnostics, { writable: true, value: void 0 }); Object.defineProperty(this, _pullAssetStreamWireSampleLogged, { writable: true, value: void 0 }); Object.defineProperty(this, _checksumsEnabled, { writable: true, value: void 0 }); Object.defineProperty(this, _cachedAssetsTotals, { writable: true, value: void 0 }); this.name = 'source::remote-strapi'; this.type = 'source'; this.defaultOptions = { // Large files + JSON/WS backpressure can go minutes between *messages* while bytes still drain locally streamTimeout: 300000 }; _class_private_field_loose_base(this, _pullAssetStreamWireSampleLogged)[_pullAssetStreamWireSampleLogged] = false; _class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] = false; this.options = { ...this.defaultOptions, ...options }; _class_private_field_loose_base(this, _checksumsEnabled)[_checksumsEnabled] = this.options.verifyChecksums === true; this.ws = null; this.dispatcher = null; } } async function createStageReadStream(stage) { if (stage === 'assets') { _class_private_field_loose_base(this, _cachedAssetsTotals)[_cachedAssetsTotals] = undefined; } const startResult = await _class_private_field_loose_base(this, _startStep)[_startStep](stage); if (startResult instanceof Error) { throw startResult; } const { id: processID, totals } = startResult; if (stage === 'assets' && totals && (totals.totalBytes != null || totals.totalCount != null)) { _class_private_field_loose_base(this, _cachedAssetsTotals)[_cachedAssetsTotals] = totals; } // Default object-mode HWM (~16 chunks). Do not await `drain` on manual `push` while `pipe()` // is attached — drain/`readableLength` races reliably deadlock after a few 1MiB asset frames. // Backpressure for pull assets is enforced by the Writable below (`highWaterMark: 1`). const stream = new PassThrough({ objectMode: true }); const listener = async (raw)=>{ const parsed = JSON.parse(raw.toString()); // If not a message related to our transfer process, ignore it if (!parsed.uuid || parsed?.data?.type !== 'transfer' || parsed?.data?.id !== processID) { this.ws?.once('message', listener); return; } const { uuid, data: message } = parsed; const { ended, error, data } = message; if (error) { await _class_private_field_loose_base(this, _respond)[_respond](uuid); stream.destroy(error); return; } if (ended) { await _class_private_field_loose_base(this, _respond)[_respond](uuid); await _class_private_field_loose_base(this, _endStep)[_endStep](stage); stream.end(); return; } for (const item of castArray(data)){ stream.push(item); } this.ws?.once('message', listener); await _class_private_field_loose_base(this, _respond)[_respond](uuid); }; this.ws?.once('message', listener); return stream; } function reportInfo(message) { _class_private_field_loose_base(this, _diagnostics)[_diagnostics]?.report({ details: { createdAt: new Date(), message, origin: 'remote-source-provider' }, kind: 'info' }); } function reportWarning(message) { _class_private_field_loose_base(this, _diagnostics)[_diagnostics]?.report({ details: { createdAt: new Date(), message, origin: 'remote-source-provider' }, kind: 'warning' }); } async function startStep(step) { try { return await this.dispatcher?.dispatchTransferStep({ action: 'start', step }, step === 'assets' ? { retryOverrides: ASSETS_START_RETRY_OVERRIDES } : undefined); } catch (e) { if (e instanceof Error) { return e; } if (typeof e === 'string') { return new ProviderTransferError(e); } return new ProviderTransferError('Unexpected error'); } } async function respond(uuid) { return new Promise((resolve, reject)=>{ this.ws?.send(JSON.stringify({ uuid }), (e)=>{ if (e) { reject(e); } else { resolve(e); } }); }); } async function endStep(step) { try { await this.dispatcher?.dispatchTransferStep({ action: 'end', step }); } catch (e) { if (e instanceof Error) { return e; } if (typeof e === 'string') { return new ProviderTransferError(e); } return new ProviderTransferError('Unexpected error'); } return null; } const createRemoteStrapiSourceProvider = (options)=>{ return new RemoteStrapiSourceProvider(options); }; export { createRemoteStrapiSourceProvider }; //# sourceMappingURL=index.mjs.map