rxdb
Version:
A local-first realtime NoSQL Database for JavaScript applications - https://rxdb.info/
565 lines (512 loc) • 21.9 kB
text/typescript
import { firstValueFrom, filter } from 'rxjs';
import {
getChangedDocumentsSince,
getWrittenDocumentsFromBulkWriteResponse,
stackCheckpoints
} from '../rx-storage-helper.ts';
import type {
BulkWriteRow,
BulkWriteRowById,
ById,
EventBulk,
RxDocumentData,
RxError,
RxReplicationWriteToMasterRow,
RxStorageChangeEvent,
RxStorageInstanceReplicationState,
RxStorageReplicationMeta,
WithDeleted
} from '../types/index.d.ts';
import {
appendToArray,
batchArray,
clone,
ensureNotFalsy,
getHeightOfRevision,
PROMISE_RESOLVE_FALSE
} from '../plugins/utils/index.ts';
import {
getLastCheckpointDoc,
setCheckpoint
} from './checkpoint.ts';
import {
resolveConflictError
} from './conflicts.ts';
import {
stripAttachmentsDataFromMetaWriteRows,
writeDocToDocState
} from './helper.ts';
import {
getAssumedMasterState,
getMetaWriteRow
} from './meta-instance.ts';
import { fillWriteDataForAttachmentsChange } from '../plugins/attachments/index.ts';
import { newRxError } from '../rx-error.ts';
/**
* Writes all document changes from the fork to the master.
* The upstream runs on two modes:
* - For initial replication, a checkpoint-iteration is used
* - For ongoing local writes, we just subscribe to the changeStream of the fork.
* In contrast to the master, the fork can be assumed to never loose connection,
* so we do not have to prepare for missed out events.
*/
export async function startReplicationUpstream<RxDocType, CheckpointType>(
state: RxStorageInstanceReplicationState<RxDocType>
) {
if (
state.input.initialCheckpoint &&
state.input.initialCheckpoint.upstream
) {
const checkpointDoc = await getLastCheckpointDoc(state, 'up');
if (!checkpointDoc) {
await setCheckpoint(
state,
'up',
state.input.initialCheckpoint.upstream
);
}
}
const replicationHandler = state.input.replicationHandler;
state.streamQueue.up = state.streamQueue.up.then(() => {
return upstreamInitialSync().then(() => {
return processTasks();
});
});
// used to detect which tasks etc can in it at which order.
let timer = 0;
let initialSyncStartTime = -1;
type Task = EventBulk<RxStorageChangeEvent<RxDocType>, any> | 'RESYNC';
type TaskWithTime = {
task: Task;
time: number;
};
const openTasks: TaskWithTime[] = [];
let persistenceQueue: Promise<boolean> = PROMISE_RESOLVE_FALSE;
const nonPersistedFromMaster: {
checkpoint?: CheckpointType;
docs: ById<RxDocumentData<RxDocType>>;
} = {
docs: {}
};
const sub = state.input.forkInstance.changeStream()
.subscribe((eventBulk) => {
if (state.events.paused.getValue()) {
return;
}
state.stats.up.forkChangeStreamEmit = state.stats.up.forkChangeStreamEmit + 1;
openTasks.push({
task: eventBulk,
time: timer++
});
if (!state.events.active.up.getValue()) {
state.events.active.up.next(true);
}
if (state.input.waitBeforePersist) {
return state.input.waitBeforePersist()
.then(() => processTasks());
} else {
return processTasks();
}
});
const subResync = replicationHandler
.masterChangeStream$
.pipe(
filter(ev => ev === 'RESYNC')
)
.subscribe(() => {
openTasks.push({
task: 'RESYNC',
time: timer++
});
processTasks();
});
// unsubscribe when replication is canceled
firstValueFrom(
state.events.canceled.pipe(
filter(canceled => !!canceled)
)
).then(() => {
sub.unsubscribe();
subResync.unsubscribe();
});
async function upstreamInitialSync() {
state.stats.up.upstreamInitialSync = state.stats.up.upstreamInitialSync + 1;
if (state.events.canceled.getValue()) {
return;
}
state.checkpointQueue = state.checkpointQueue.then(() => getLastCheckpointDoc(state, 'up'));
let lastCheckpoint: CheckpointType = await state.checkpointQueue;
const promises: Set<Promise<any>> = new Set();
while (!state.events.canceled.getValue()) {
initialSyncStartTime = timer++;
/**
* Throttle the calls to
* forkInstance.getChangedDocumentsSince() so that
* if the pushing to the remote is slower compared to the
* pulling out of forkInstance, we do not block the UI too much
* and have a big memory spike with all forkInstance documents.
*/
if (promises.size > 3) {
await Promise.race(Array.from(promises));
}
const upResult = await getChangedDocumentsSince(
state.input.forkInstance,
state.input.pushBatchSize,
lastCheckpoint
);
if (upResult.documents.length === 0) {
break;
}
lastCheckpoint = stackCheckpoints([lastCheckpoint, upResult.checkpoint]);
const promise = persistToMaster(
upResult.documents,
ensureNotFalsy(lastCheckpoint)
);
promises.add(promise);
promise.catch().then(() => promises.delete(promise));
}
/**
* If we had conflicts during the initial sync,
* it means that we likely have new writes to the fork
* and so we have to run the initial sync again to upstream these new writes.
*/
const resolvedPromises = await Promise.all(promises);
const hadConflicts = resolvedPromises.find(r => !!r);
if (hadConflicts) {
await upstreamInitialSync();
} else if (
!state.firstSyncDone.up.getValue() &&
!state.events.canceled.getValue()
) {
state.firstSyncDone.up.next(true);
}
}
/**
* Takes all open tasks an processes them at once.
*/
function processTasks() {
if (
state.events.canceled.getValue() ||
openTasks.length === 0
) {
state.events.active.up.next(false);
return;
}
state.stats.up.processTasks = state.stats.up.processTasks + 1;
state.events.active.up.next(true);
state.streamQueue.up = state.streamQueue.up.then(async () => {
/**
* Merge/filter all open tasks
*/
const docs: RxDocumentData<RxDocType>[] = [];
let checkpoint: CheckpointType | undefined;
while (openTasks.length > 0) {
const taskWithTime = ensureNotFalsy(openTasks.shift());
/**
* If the task came in before the last time the initial sync fetching
* has run, we can ignore the task because the initial sync already processed
* these documents.
*/
if (taskWithTime.time < initialSyncStartTime) {
continue;
}
if (taskWithTime.task === 'RESYNC') {
state.events.active.up.next(false);
await upstreamInitialSync();
return;
}
/**
* If the task came from the downstream, we can ignore these documents
* because we know they are replicated already.
* But even if they can be ignored, we later have to call persistToMaster()
* to have the correct checkpoint set.
*/
if (taskWithTime.task.context !== await state.downstreamBulkWriteFlag) {
appendToArray(
docs,
taskWithTime.task.events.map(r => {
return r.documentData as any;
})
);
}
checkpoint = stackCheckpoints([checkpoint, taskWithTime.task.checkpoint]);
}
await persistToMaster(
docs,
checkpoint as any
);
// might have got more tasks while running persistToMaster()
if (openTasks.length === 0) {
state.events.active.up.next(false);
} else {
return processTasks();
}
});
}
/**
* Returns true if had conflicts,
* false if not.
*/
function persistToMaster(
docs: RxDocumentData<RxDocType>[],
checkpoint: CheckpointType
): Promise<boolean> {
state.stats.up.persistToMaster = state.stats.up.persistToMaster + 1;
/**
* Add the new docs to the non-persistent list
*/
docs.forEach(docData => {
const docId: string = (docData as any)[state.primaryPath];
nonPersistedFromMaster.docs[docId] = docData;
});
nonPersistedFromMaster.checkpoint = checkpoint;
persistenceQueue = persistenceQueue.then(async () => {
if (state.events.canceled.getValue()) {
return false;
}
const upDocsById: ById<RxDocumentData<RxDocType>> = nonPersistedFromMaster.docs;
nonPersistedFromMaster.docs = {};
const useCheckpoint = nonPersistedFromMaster.checkpoint;
const docIds = Object.keys(upDocsById);
/**
* Even if we do not have anything to push,
* we still have to store the up-checkpoint.
* This ensures that when many documents have been pulled
* from the remote (that do not have to be pushed again),
* we continue at the correct position and do not have to load
* these documents from the storage again when the replication is restarted.
*/
function rememberCheckpointBeforeReturn() {
return setCheckpoint(
state,
'up',
useCheckpoint
);
};
if (docIds.length === 0) {
rememberCheckpointBeforeReturn();
return false;
}
const assumedMasterState = await getAssumedMasterState(
state,
docIds
);
const writeRowsToMaster: ById<RxReplicationWriteToMasterRow<RxDocType>> = {};
const writeRowsToMasterIds: string[] = [];
const writeRowsToMeta: BulkWriteRowById<RxStorageReplicationMeta<RxDocType, any>> = {};
const forkStateById: ById<RxDocumentData<RxDocType>> = {};
await Promise.all(
docIds.map(async (docId) => {
const fullDocData: RxDocumentData<RxDocType> = upDocsById[docId];
forkStateById[docId] = fullDocData;
const docData: WithDeleted<RxDocType> = writeDocToDocState(fullDocData, state.hasAttachments, !!state.input.keepMeta);
const assumedMasterDoc = assumedMasterState[docId];
/**
* If the master state is equal to the
* fork state, we can assume that the document state is already
* replicated.
*/
if (
(
assumedMasterDoc &&
// if the isResolvedConflict is correct, we do not have to compare the documents.
assumedMasterDoc.metaDocument.isResolvedConflict !== fullDocData._rev
&&
(
state.input.conflictHandler.isEqual(
assumedMasterDoc.docData,
docData,
'upstream-check-if-equal'
)
)
)
||
/**
* If the master works with _rev fields,
* we use that to check if our current doc state
* is different from the assumedMasterDoc.
*/
(
assumedMasterDoc &&
(assumedMasterDoc.docData as any)._rev &&
getHeightOfRevision(fullDocData._rev) === fullDocData._meta[state.input.identifier]
)
) {
return;
}
writeRowsToMasterIds.push(docId);
writeRowsToMaster[docId] = {
assumedMasterState: assumedMasterDoc ? assumedMasterDoc.docData : undefined,
newDocumentState: docData
};
writeRowsToMeta[docId] = await getMetaWriteRow(
state,
docData,
assumedMasterDoc ? assumedMasterDoc.metaDocument : undefined
);
})
);
if (writeRowsToMasterIds.length === 0) {
rememberCheckpointBeforeReturn();
return false;
}
const writeRowsArray = Object.values(writeRowsToMaster);
const conflictIds: Set<string> = new Set();
const conflictsById: ById<WithDeleted<RxDocType>> = {};
/**
* To always respect the push.batchSize,
* we have to split the write rows into batches
* to ensure that replicationHandler.masterWrite() is never
* called with more documents than what the batchSize limits.
*/
const writeBatches = batchArray(writeRowsArray, state.input.pushBatchSize);
await Promise.all(
writeBatches.map(async (writeBatch) => {
// enhance docs with attachments
if (state.hasAttachments) {
await Promise.all(
writeBatch.map(async (row) => {
row.newDocumentState = await fillWriteDataForAttachmentsChange(
state.primaryPath,
state.input.forkInstance,
clone(row.newDocumentState),
row.assumedMasterState
);
})
);
}
const masterWriteResult = await replicationHandler.masterWrite(writeBatch);
masterWriteResult.forEach(conflictDoc => {
const id = (conflictDoc as any)[state.primaryPath];
conflictIds.add(id);
conflictsById[id] = conflictDoc;
});
})
);
const useWriteRowsToMeta: BulkWriteRow<RxStorageReplicationMeta<RxDocType, any>>[] = [];
writeRowsToMasterIds.forEach(docId => {
if (!conflictIds.has(docId)) {
state.events.processed.up.next(writeRowsToMaster[docId]);
useWriteRowsToMeta.push(writeRowsToMeta[docId]);
}
});
if (state.events.canceled.getValue()) {
return false;
}
if (useWriteRowsToMeta.length > 0) {
await state.input.metaInstance.bulkWrite(
stripAttachmentsDataFromMetaWriteRows(state, useWriteRowsToMeta),
'replication-up-write-meta'
);
// TODO what happens when we have conflicts here?
}
/**
* Resolve conflicts by writing a new document
* state to the fork instance and the 'real' master state
* to the meta instance.
* Non-409 errors will be detected by resolveConflictError()
*/
let hadConflictWrites = false;
if (conflictIds.size > 0) {
state.stats.up.persistToMasterHadConflicts = state.stats.up.persistToMasterHadConflicts + 1;
const conflictWriteFork: BulkWriteRow<RxDocType>[] = [];
const conflictWriteMeta: BulkWriteRowById<RxStorageReplicationMeta<RxDocType, any>> = {};
await Promise.all(
Object
.entries(conflictsById)
.map(([docId, realMasterState]) => {
const writeToMasterRow = writeRowsToMaster[docId];
const input = {
newDocumentState: writeToMasterRow.newDocumentState,
assumedMasterState: writeToMasterRow.assumedMasterState,
realMasterState
};
return resolveConflictError(
state,
input,
forkStateById[docId]
).then(async (resolved) => {
if (resolved) {
state.events.resolvedConflicts.next({
input,
output: resolved
});
conflictWriteFork.push({
previous: forkStateById[docId],
document: resolved
});
const assumedMasterDoc = assumedMasterState[docId];
conflictWriteMeta[docId] = await getMetaWriteRow(
state,
ensureNotFalsy(realMasterState),
assumedMasterDoc ? assumedMasterDoc.metaDocument : undefined,
resolved._rev
);
}
});
})
);
if (conflictWriteFork.length > 0) {
hadConflictWrites = true;
state.stats.up.persistToMasterConflictWrites = state.stats.up.persistToMasterConflictWrites + 1;
const forkWriteResult = await state.input.forkInstance.bulkWrite(
conflictWriteFork,
'replication-up-write-conflict'
);
let mustThrow: RxError | undefined;
forkWriteResult.error.forEach(error => {
/**
* Conflict-Errors in the forkWriteResult must not be handled
* because they have been caused by a write to the forkInstance
* in between which will anyway trigger a new upstream cycle
* that will then resolved the conflict again.
*/
if (error.status === 409) {
return;
}
// other non-conflict errors must be handled
const throwMe = newRxError('RC_PUSH', {
writeError: error
});
state.events.error.next(throwMe);
mustThrow = throwMe;
});
if (mustThrow) {
throw mustThrow;
}
const useMetaWrites: BulkWriteRow<RxStorageReplicationMeta<RxDocType, any>>[] = [];
const success = getWrittenDocumentsFromBulkWriteResponse(
state.primaryPath,
conflictWriteFork,
forkWriteResult
);
success
.forEach(docData => {
const docId = (docData as any)[state.primaryPath];
useMetaWrites.push(
conflictWriteMeta[docId]
);
});
if (useMetaWrites.length > 0) {
await state.input.metaInstance.bulkWrite(
stripAttachmentsDataFromMetaWriteRows(state, useMetaWrites),
'replication-up-write-conflict-meta'
);
}
// TODO what to do with conflicts while writing to the metaInstance?
}
}
/**
* For better performance we do not await checkpoint writes,
* but to ensure order on parallel checkpoint writes,
* we have to use a queue.
*/
rememberCheckpointBeforeReturn();
return hadConflictWrites;
}).catch(unhandledError => {
state.events.error.next(unhandledError);
return false;
});
return persistenceQueue;
}
}