rxdb
Version:
A local-first realtime NoSQL Database for JavaScript applications - https://rxdb.info/
528 lines (488 loc) • 21.7 kB
text/typescript
import {
firstValueFrom,
filter,
mergeMap
} from 'rxjs';
import { newRxError } from '../rx-error.ts';
import { getWrittenDocumentsFromBulkWriteResponse, stackCheckpoints } from '../rx-storage-helper.ts';
import type {
RxStorageInstanceReplicationState,
BulkWriteRow,
BulkWriteRowById,
RxStorageReplicationMeta,
RxDocumentData,
ById,
WithDeleted,
DocumentsWithCheckpoint,
WithDeletedAndAttachments,
RxError
} from '../types/index.d.ts';
import {
appendToArray,
createRevision,
ensureNotFalsy,
flatClone,
getDefaultRevision,
getHeightOfRevision,
now,
PROMISE_RESOLVE_VOID
} from '../plugins/utils/index.ts';
import {
getLastCheckpointDoc,
setCheckpoint
} from './checkpoint.ts';
import {
stripAttachmentsDataFromMetaWriteRows,
writeDocToDocState
} from './helper.ts';
import {
getAssumedMasterState,
getMetaWriteRow
} from './meta-instance.ts';
/**
* Writes all documents from the master to the fork.
* The downstream has two operation modes
* - Sync by iterating over the checkpoints via downstreamResyncOnce()
* - Sync by listening to the changestream via downstreamProcessChanges()
* We need this to be able to do initial syncs
* and still can have fast event based sync when the client is not offline.
*/
export async function startReplicationDownstream<RxDocType, CheckpointType = any>(
state: RxStorageInstanceReplicationState<RxDocType>
) {
if (
state.input.initialCheckpoint &&
state.input.initialCheckpoint.downstream
) {
const checkpointDoc = await getLastCheckpointDoc(state, 'down');
if (!checkpointDoc) {
await setCheckpoint(
state,
'down',
state.input.initialCheckpoint.downstream
);
}
}
const identifierHash = await state.input.hashFunction(state.input.identifier);
const replicationHandler = state.input.replicationHandler;
// used to detect which tasks etc can in it at which order.
let timer = 0;
type Task = DocumentsWithCheckpoint<RxDocType, any> | 'RESYNC';
type TaskWithTime = {
time: number;
task: Task;
};
const openTasks: TaskWithTime[] = [];
function addNewTask(task: Task): void {
state.stats.down.addNewTask = state.stats.down.addNewTask + 1;
const taskWithTime = {
time: timer++,
task
};
openTasks.push(taskWithTime);
state.streamQueue.down = state.streamQueue.down
.then(() => {
const useTasks: Task[] = [];
while (openTasks.length > 0) {
state.events.active.down.next(true);
const innerTaskWithTime = ensureNotFalsy(openTasks.shift());
/**
* If the task came in before the last time we started the pull
* from the master, then we can drop the task.
*/
if (innerTaskWithTime.time < lastTimeMasterChangesRequested) {
continue;
}
if (innerTaskWithTime.task === 'RESYNC') {
if (useTasks.length === 0) {
useTasks.push(innerTaskWithTime.task);
break;
} else {
break;
}
}
useTasks.push(innerTaskWithTime.task);
}
if (useTasks.length === 0) {
return;
}
if (useTasks[0] === 'RESYNC') {
return downstreamResyncOnce();
} else {
return downstreamProcessChanges(useTasks);
}
}).then(() => {
state.events.active.down.next(false);
if (
!state.firstSyncDone.down.getValue() &&
!state.events.canceled.getValue()
) {
state.firstSyncDone.down.next(true);
}
});
}
addNewTask('RESYNC');
/**
* If a write on the master happens, we have to trigger the downstream.
* Only do this if not canceled yet, otherwise firstValueFrom errors
* when running on a completed observable.
*/
if (!state.events.canceled.getValue()) {
const sub = replicationHandler
.masterChangeStream$
.pipe(
mergeMap(async (ev) => {
/**
* While a push is running, we have to delay all incoming
* events from the server to not mix up the replication state.
*/
await firstValueFrom(
state.events.active.up.pipe(filter(s => !s))
);
return ev;
})
)
.subscribe((task: Task) => {
state.stats.down.masterChangeStreamEmit = state.stats.down.masterChangeStreamEmit + 1;
addNewTask(task);
});
// unsubscribe when replication is canceled
firstValueFrom(
state.events.canceled.pipe(
filter(canceled => !!canceled)
)
).then(() => sub.unsubscribe());
}
/**
* For faster performance, we directly start each write
* and then await all writes at the end.
*/
let lastTimeMasterChangesRequested: number = -1;
async function downstreamResyncOnce() {
state.stats.down.downstreamResyncOnce = state.stats.down.downstreamResyncOnce + 1;
if (state.events.canceled.getValue()) {
return;
}
state.checkpointQueue = state.checkpointQueue.then(() => getLastCheckpointDoc(state, 'down'));
let lastCheckpoint: CheckpointType = await state.checkpointQueue;
const promises: Promise<any>[] = [];
while (!state.events.canceled.getValue()) {
lastTimeMasterChangesRequested = timer++;
const downResult = await replicationHandler.masterChangesSince(
lastCheckpoint,
state.input.pullBatchSize
);
if (downResult.documents.length === 0) {
break;
}
lastCheckpoint = stackCheckpoints([lastCheckpoint, downResult.checkpoint]);
promises.push(
persistFromMaster(
downResult.documents,
lastCheckpoint
)
);
/**
* By definition we stop pull when the pulled documents
* do not fill up the pullBatchSize because we
* can assume that the remote has no more documents.
*/
if (downResult.documents.length < state.input.pullBatchSize) {
break;
}
}
await Promise.all(promises);
}
function downstreamProcessChanges(tasks: Task[]) {
state.stats.down.downstreamProcessChanges = state.stats.down.downstreamProcessChanges + 1;
const docsOfAllTasks: WithDeleted<RxDocType>[] = [];
let lastCheckpoint: CheckpointType | undefined = null as any;
tasks.forEach(task => {
if (task === 'RESYNC') {
throw new Error('SNH');
}
appendToArray(docsOfAllTasks, task.documents);
lastCheckpoint = stackCheckpoints([lastCheckpoint, task.checkpoint]);
});
return persistFromMaster(
docsOfAllTasks,
ensureNotFalsy(lastCheckpoint)
);
}
/**
* It can happen that the calls to masterChangesSince() or the changeStream()
* are way faster then how fast the documents can be persisted.
* Therefore we merge all incoming downResults into the nonPersistedFromMaster object
* and process them together if possible.
* This often bundles up single writes and improves performance
* by processing the documents in bulks.
*/
let persistenceQueue = PROMISE_RESOLVE_VOID;
const nonPersistedFromMaster: {
checkpoint?: CheckpointType;
docs: ById<WithDeleted<RxDocType>>;
} = {
docs: {}
};
function persistFromMaster(
docs: WithDeleted<RxDocType>[],
checkpoint: CheckpointType
): Promise<void> {
const primaryPath = state.primaryPath;
state.stats.down.persistFromMaster = state.stats.down.persistFromMaster + 1;
/**
* Add the new docs to the non-persistent list
*/
docs.forEach(docData => {
const docId: string = (docData as any)[primaryPath];
nonPersistedFromMaster.docs[docId] = docData;
});
nonPersistedFromMaster.checkpoint = checkpoint;
/**
* Run in the queue
* with all open documents from nonPersistedFromMaster.
*/
persistenceQueue = persistenceQueue.then(() => {
const downDocsById: ById<WithDeletedAndAttachments<RxDocType>> = nonPersistedFromMaster.docs;
nonPersistedFromMaster.docs = {};
const useCheckpoint = nonPersistedFromMaster.checkpoint;
const docIds = Object.keys(downDocsById);
if (
state.events.canceled.getValue() ||
docIds.length === 0
) {
return PROMISE_RESOLVE_VOID;
}
const writeRowsToFork: BulkWriteRow<RxDocType>[] = [];
const writeRowsToForkById: ById<BulkWriteRow<RxDocType>> = {};
const writeRowsToMeta: BulkWriteRowById<RxStorageReplicationMeta<RxDocType, CheckpointType>> = {};
const useMetaWriteRows: BulkWriteRow<RxStorageReplicationMeta<RxDocType, CheckpointType>>[] = [];
return Promise.all([
state.input.forkInstance.findDocumentsById(docIds, true),
getAssumedMasterState(
state,
docIds
)
]).then(([
currentForkStateList,
assumedMasterState
]) => {
const currentForkState = new Map<string, RxDocumentData<RxDocType>>();
currentForkStateList.forEach(doc => currentForkState.set((doc as any)[primaryPath], doc));
return Promise.all(
docIds.map(async (docId) => {
const forkStateFullDoc: RxDocumentData<RxDocType> | undefined = currentForkState.get(docId);
const forkStateDocData: WithDeletedAndAttachments<RxDocType> | undefined = forkStateFullDoc
? writeDocToDocState(forkStateFullDoc, state.hasAttachments, false)
: undefined
;
const masterState = downDocsById[docId];
const assumedMaster = assumedMasterState[docId];
if (
assumedMaster &&
forkStateFullDoc &&
assumedMaster.metaDocument.isResolvedConflict === forkStateFullDoc._rev
) {
/**
* The current fork state represents a resolved conflict
* that first must be send to the master in the upstream.
* All conflicts are resolved by the upstream.
*/
// return PROMISE_RESOLVE_VOID;
await state.streamQueue.up;
}
let isAssumedMasterEqualToForkState = !assumedMaster || !forkStateDocData ?
false :
state.input.conflictHandler.isEqual(
assumedMaster.docData,
forkStateDocData,
'downstream-check-if-equal-0'
);
if (
!isAssumedMasterEqualToForkState &&
(
assumedMaster &&
(assumedMaster.docData as any)._rev &&
forkStateFullDoc &&
forkStateFullDoc._meta[state.input.identifier] &&
getHeightOfRevision(forkStateFullDoc._rev) === forkStateFullDoc._meta[state.input.identifier]
)
) {
isAssumedMasterEqualToForkState = true;
}
if (
(
forkStateFullDoc &&
assumedMaster &&
isAssumedMasterEqualToForkState === false
) ||
(
forkStateFullDoc && !assumedMaster
)
) {
/**
* We have a non-upstream-replicated
* local write to the fork.
* This means we ignore the downstream of this document
* because anyway the upstream will first resolve the conflict.
*/
return PROMISE_RESOLVE_VOID;
}
const areStatesExactlyEqual = !forkStateDocData
? false
: state.input.conflictHandler.isEqual(
masterState,
forkStateDocData,
'downstream-check-if-equal-1'
);
if (
forkStateDocData &&
areStatesExactlyEqual
) {
/**
* Document states are exactly equal.
* This can happen when the replication is shut down
* unexpected like when the user goes offline.
*
* Only when the assumedMaster is different from the forkState,
* we have to patch the document in the meta instance.
*/
if (
!assumedMaster ||
isAssumedMasterEqualToForkState === false
) {
useMetaWriteRows.push(
await getMetaWriteRow(
state,
forkStateDocData,
assumedMaster ? assumedMaster.metaDocument : undefined
)
);
}
return PROMISE_RESOLVE_VOID;
}
/**
* All other master states need to be written to the forkInstance
* and metaInstance.
*/
const newForkState = Object.assign(
{},
masterState,
forkStateFullDoc ? {
_meta: flatClone(forkStateFullDoc._meta),
_attachments: state.hasAttachments && masterState._attachments ? masterState._attachments : {},
_rev: getDefaultRevision()
} : {
_meta: {
lwt: now()
},
_rev: getDefaultRevision(),
_attachments: state.hasAttachments && masterState._attachments ? masterState._attachments : {}
}
);
/**
* If the remote works with revisions,
* we store the height of the next fork-state revision
* inside of the documents meta data.
* By doing so we can filter it out in the upstream
* and detect the document as being equal to master or not.
* This is used for example in the CouchDB replication plugin.
*/
if ((masterState as any)._rev) {
const nextRevisionHeight = !forkStateFullDoc ? 1 : getHeightOfRevision(forkStateFullDoc._rev) + 1;
newForkState._meta[state.input.identifier] = nextRevisionHeight;
if (state.input.keepMeta) {
newForkState._rev = (masterState as any)._rev;
}
}
if (
state.input.keepMeta &&
(masterState as any)._meta
) {
newForkState._meta = (masterState as any)._meta;
}
const forkWriteRow = {
previous: forkStateFullDoc,
document: newForkState
};
forkWriteRow.document._rev = forkWriteRow.document._rev ? forkWriteRow.document._rev : createRevision(
identifierHash,
forkWriteRow.previous
);
writeRowsToFork.push(forkWriteRow);
writeRowsToForkById[docId] = forkWriteRow;
writeRowsToMeta[docId] = await getMetaWriteRow(
state,
masterState,
assumedMaster ? assumedMaster.metaDocument : undefined
);
})
);
}).then(async () => {
if (writeRowsToFork.length > 0) {
return state.input.forkInstance.bulkWrite(
writeRowsToFork,
await state.downstreamBulkWriteFlag
).then((forkWriteResult) => {
const success = getWrittenDocumentsFromBulkWriteResponse(
state.primaryPath,
writeRowsToFork,
forkWriteResult
);
success.forEach(doc => {
const docId = (doc as any)[primaryPath];
state.events.processed.down.next(writeRowsToForkById[docId]);
useMetaWriteRows.push(writeRowsToMeta[docId]);
});
let mustThrow: RxError | undefined;
forkWriteResult.error.forEach(error => {
/**
* We do not have to care about downstream conflict errors here
* because on conflict, it will be solved locally and result in another write.
*/
if (error.status === 409) {
return;
}
// other non-conflict errors must be handled
const throwMe = newRxError('RC_PULL', {
writeError: error
});
state.events.error.next(throwMe);
mustThrow = throwMe;
});
if (mustThrow) {
throw mustThrow;
}
});
}
}).then(() => {
if (useMetaWriteRows.length > 0) {
return state.input.metaInstance.bulkWrite(
stripAttachmentsDataFromMetaWriteRows(state, useMetaWriteRows),
'replication-down-write-meta'
).then(metaWriteResult => {
metaWriteResult.error
.forEach(writeError => {
state.events.error.next(newRxError('RC_PULL', {
id: writeError.documentId,
writeError
}));
});
});
}
}).then(() => {
/**
* For better performance we do not await checkpoint writes,
* but to ensure order on parallel checkpoint writes,
* we have to use a queue.
*/
setCheckpoint(
state,
'down',
useCheckpoint
);
});
}).catch(unhandledError => state.events.error.next(unhandledError));
return persistenceQueue;
}
}