UNPKG

@getanthill/datastore

Version:

Event-Sourced Datastore

611 lines (531 loc) 14.8 kB
import type { JSONSchemaType } from 'ajv'; import type { Datastore } from '.'; import type { Any, AnyObject } from '../typings'; import chunk from 'lodash/chunk'; import { cloneDeep, isEqual, pick } from 'lodash'; export interface MultiQuery { datastore: string; model: string; source: 'events' | 'entities'; query: object; // --- headers?: any; raw?: any; correlationField?: string; } export interface Iteration { query_index: number; page: number; version: number; max_version: number; is_exhausted: boolean; cursor_last_id: string; cursor_last_correlation_id: string; results: AnyObject[]; } export const ERRORS = { INCOMPATIBLE_MULTIPLE_HANDLE_OPTIONS: 'Only one "handle in" option must be defined', INCOMPATIBLE_MUTATION_ON_EVENTS: 'Options are incompatible: mutation on events source is not supported', }; export function objToJsonSchema(obj: Any): JSONSchemaType<Any> { if (obj === null) { return { type: 'null', nullable: true, }; } if (typeof obj === 'number') { return { type: 'number', enum: [obj], }; } if (typeof obj === 'string') { return { type: 'string', enum: [obj], }; } if (obj instanceof Date) { return { type: 'string', enum: [obj.toISOString()], }; } if (typeof obj === 'boolean') { return { type: 'boolean', enum: [obj], }; } if (Array.isArray(obj)) { // @ts-ignore return { type: 'array', items: obj.map(objToJsonSchema), }; } const schema: { type: string; required?: string[]; properties: { [key: string]: any }; } = { type: 'object', required: [], properties: {}, }; for (const key of Object.keys(obj)) { schema.properties[key] = objToJsonSchema(obj[key]); obj[key] !== null && schema.required?.push(key); } if (schema.required?.length === 0) { delete schema.required; } return schema as JSONSchemaType<Any>; } export function defaultWalkMultiSortHandler(a: any, b: any) { const aComparisonDate = a.updated_at ?? a.created_at; const bComparisonDate = b.updated_at ?? b.created_at; const dateComparisonValue = aComparisonDate.localeCompare(bComparisonDate); return dateComparisonValue === 0 ? (a.version ?? 0) - (b.version ?? 0) : dateComparisonValue; } export function sortResults( results: AnyObject[], sortHandler: (a: any, b: any) => any = defaultWalkMultiSortHandler, ): AnyObject[] { return results.sort(sortHandler); } export async function getMinVersions( datastores: Map<string, Datastore>, queries: MultiQuery[], ): Promise<number[]> { return Promise.all( queries.map( (q) => datastores .get(q.datastore) ?.minEventsVersion(q.model, q.query, q.headers) ?? 0, ), ); } export async function getMaxVersions( datastores: Map<string, Datastore>, queries: MultiQuery[], ): Promise<number[]> { return Promise.all( queries.map( (q) => datastores .get(q.datastore) ?.maxEventsVersion(q.model, q.query, q.headers) ?? -1, ), ); } export async function fetchResultsForQuery( datastores: Map<string, Datastore>, query: MultiQuery, pageSize: number, queryIteration: Iteration, opts?: { version_ordered?: boolean; }, ): Promise<AnyObject[]> { const isVersionOrdered: boolean = opts?.version_ordered ?? false; const { data: results, headers } = await datastores .get(query.datastore)! .walkNext( query.model, query.query, query.source, queryIteration.page, 2 * pageSize, { cursor_last_id: queryIteration.cursor_last_id, cursor_last_correlation_id: queryIteration.cursor_last_correlation_id, current_version: queryIteration.version, version_ordered: isVersionOrdered, headers: query.headers, }, ); query.correlationField = headers['correlation-field']; if (results.length < pageSize && isVersionOrdered === false) { queryIteration.is_exhausted = true; queryIteration.results.push(...results); return results; } const cursorLastId = headers?.['cursor-last-id'] ?? ''; const cursorLastCorrelationId = headers?.['cursor-last-correlation-id'] ?? ''; if ( !!cursorLastId && !!queryIteration.cursor_last_id && cursorLastId === queryIteration.cursor_last_id && cursorLastCorrelationId === queryIteration.cursor_last_correlation_id ) { throw new Error('Same cursor last id after iteration'); } queryIteration.results.push(...results); queryIteration.page += 1; queryIteration.cursor_last_id = cursorLastId; queryIteration.cursor_last_correlation_id = cursorLastCorrelationId; if ( results.length < pageSize && queryIteration.version >= queryIteration.max_version ) { queryIteration.is_exhausted = true; } if ( results.length < pageSize && queryIteration.version < queryIteration.max_version ) { const nextVersion = await datastores.get(query.datastore)!.minEventsVersion( query.model, { ...query.query, version: { $gt: queryIteration.version, }, }, query.headers, ); queryIteration.version = nextVersion; queryIteration.cursor_last_id = ''; queryIteration.cursor_last_correlation_id = ''; queryIteration.page = 0; } return results; } export async function handleResults( results: Array<AnyObject>, queries: MultiQuery[], iteration: Map<number, Iteration>, handler: ( res: any, query: MultiQuery, queryIteration: Iteration, batch_id: number, index: number, ) => any, opts?: { handle_in_order?: boolean; handle_in_parallel?: boolean; }, ) { let batch: Array<AnyObject> = []; let batchId = 0; let ids = new Set(); for (const [ri, r] of results.entries()) { if (opts?.handle_in_order === true) { await handler( r.result, queries[r.queryIndex], iteration.get(r.queryIndex)!, batchId, 0, ); batchId += 1; continue; } const query = queries[r.queryIndex]; const correlationId = r.result[query.correlationField!]; const resultId = opts?.handle_in_parallel === true ? ri : `${query.datastore}/${query.model}/${correlationId}`; if (ids.has(resultId)) { await Promise.all( batch.map(({ result, queryIndex }, index) => { return handler( result, queries[queryIndex], iteration.get(queryIndex)!, batchId, index, ); }), ); batchId += 1; batch = []; ids = new Set(); } ids.add(resultId); batch.push(r); } await Promise.all( batch.map(({ result, queryIndex }, index) => { return handler( result, queries[queryIndex], iteration.get(queryIndex)!, batchId, index, ); }), ); } async function fetchResultsForQueries( datastores: Map<string, Datastore>, iteration: Map<number, Iteration>, queries: MultiQuery[], pageSize: number, opts?: { sleep?: number; version_ordered?: boolean; handle_in_order?: boolean; handle_in_parallel?: boolean; chunk_size?: number; }, ): Promise<void> { const batches = chunk(queries, opts?.chunk_size ?? 5); for (const [i, batch] of batches.entries()) { await Promise.all( batch.map((q, j) => { const queryIteration = iteration.get(i * (opts?.chunk_size ?? 5) + j)!; if (queryIteration.is_exhausted === true) { return Promise.resolve({ data: [] }); } if (queryIteration.results.length >= pageSize) { return Promise.resolve(queryIteration.results); } return fetchResultsForQuery(datastores, q, pageSize, queryIteration, { version_ordered: opts?.version_ordered, }); }), ); } } async function fetchSortedResults( datastores: Map<string, Datastore>, iteration: Map<number, Iteration>, queries: MultiQuery[], pageSize: number, sortHandler: (a: any, b: any) => any, opts?: { sleep?: number; version_ordered?: boolean; handle_in_order?: boolean; handle_in_parallel?: boolean; chunk_size?: number; }, ): Promise< { result: AnyObject; created_at: string; updated_at: string; queryIndex: number; resultIndex: number; }[] > { const results = []; await fetchResultsForQueries(datastores, iteration, queries, pageSize, opts); const sortableResults: { result: AnyObject; created_at: string; updated_at: string; queryIndex: number; resultIndex: number; }[] = []; iteration.forEach((queryIteration, queryIndex) => { sortResults(queryIteration.results, sortHandler); sortableResults.push( ...queryIteration.results.map((result, resultIndex) => ({ result, created_at: result.created_at, updated_at: result.updated_at, queryIndex, resultIndex, })), ); }); sortResults(sortableResults, sortHandler); while (sortableResults.length > 0 && results.length < pageSize) { const result = sortableResults.shift()!; iteration.get(result.queryIndex)!.results.shift(); results.push(result); } return results; } export async function handleIterationWithMutation( datastores: Map<string, Datastore>, iteration: Map<number, Iteration>, clonedIteration: Map<number, Iteration>, queries: MultiQuery[], pageSize: number, opts?: { sleep?: number; version_ordered?: boolean; handle_in_order?: boolean; handle_in_parallel?: boolean; chunk_size?: number; }, ): Promise<void> { // Clean previous results clonedIteration.forEach((queryIteration) => { queryIteration.results = []; }); await fetchResultsForQueries( datastores, clonedIteration, queries, pageSize, opts, ); /** * @todo check same state to avoid processing */ // if (!isEqual(resultsAfterMutation, results)) { // } iteration.forEach((queryIteration, index) => { const clonedQueryIteration = clonedIteration.get(index)!; const lastMatchingCorrelationIdIndex = getLastMatchingCorrelationIdIndex( queryIteration.results.map( (r) => r[queries[index].correlationField as string], ), clonedQueryIteration.results.map( (r) => r[queries[index].correlationField as string], ), ); const resultsToPush = clonedQueryIteration.results.slice( lastMatchingCorrelationIdIndex + 1, ); queryIteration.results.push(...resultsToPush); queryIteration.cursor_last_correlation_id = clonedQueryIteration.cursor_last_correlation_id; queryIteration.cursor_last_id = clonedQueryIteration.cursor_last_id; queryIteration.page = clonedQueryIteration.page; queryIteration.version = clonedQueryIteration.version; queryIteration.is_exhausted = clonedQueryIteration.is_exhausted; }); } export function getLastMatchingCorrelationIdIndex( before: Array<string>, after: Array<string>, ): number { const lastCorrelationId = before[before.length - 1]; return after.lastIndexOf(lastCorrelationId); } export function checkOptions( queries: MultiQuery[], opts?: { sleep?: number; version_ordered?: boolean; handle_in_order?: boolean; handle_in_parallel?: boolean; chunk_size?: number; is_mutating?: boolean; }, ): void { if (opts?.handle_in_order === true && opts?.handle_in_parallel === true) { throw new Error(ERRORS.INCOMPATIBLE_MULTIPLE_HANDLE_OPTIONS); } for (const query of queries) { if (query.source === 'events' && opts?.is_mutating === true) { throw new Error(ERRORS.INCOMPATIBLE_MUTATION_ON_EVENTS); } } } export async function walkMulti( datastores: Map<string, Datastore>, queries: MultiQuery[], pageSize = 100, handler: (res: any, query: MultiQuery, queryIteration: Iteration) => any, opts?: { sleep?: number; version_ordered?: boolean; handle_in_order?: boolean; handle_in_parallel?: boolean; chunk_size?: number; is_mutating?: boolean; }, sortHandler: (a: any, b: any) => any = defaultWalkMultiSortHandler, ) { checkOptions(queries, opts); queries.forEach((q: MultiQuery) => { if ('_fields' in q.query) { q.query._fields = { ...q.query._fields!, created_at: 1, updated_at: 1, }; } return q; }); const minVersions: number[] = opts?.version_ordered === true ? await getMinVersions(datastores, queries) : queries.map(() => -1); const maxVersions: number[] = opts?.version_ordered === true ? await getMaxVersions(datastores, queries) : queries.map(() => -1); const iteration: Map<number, Iteration> = new Map( queries.map((_, i) => [ i, { query_index: i, page: 0, version: minVersions[i], max_version: maxVersions[i], is_exhausted: false, cursor_last_id: '', cursor_last_correlation_id: '', results: [], }, ]), ); const handledCorrelatoniIds = new Set<string>(); let fullyExhausted; let clonedIteration: Map<number, Iteration> = new Map(); do { fullyExhausted = true; if (opts?.is_mutating === true) { clonedIteration = cloneDeep(iteration); } let results = await fetchSortedResults( datastores, iteration, queries, pageSize, sortHandler, opts, ); if (opts?.is_mutating === true) { results = results.filter((result) => { const query = queries[result.queryIndex]; const correlationId = `${query.datastore}:${query.model}:${result.result[query.correlationField!]}`; if (handledCorrelatoniIds.has(correlationId)) { return false; } handledCorrelatoniIds.add(correlationId); return true; }); } await handleResults(results, queries, iteration, handler, { handle_in_order: opts?.handle_in_order, handle_in_parallel: opts?.handle_in_parallel, }); if (opts?.sleep && opts.sleep > 0) { await new Promise((resolve) => setTimeout(resolve, opts?.sleep)); } Array.from(iteration.values()).every((it) => { if (it.is_exhausted === false || it.results.length > 0) { fullyExhausted = false; return false; } return true; }); if (opts?.is_mutating === true) { await handleIterationWithMutation( datastores, iteration, clonedIteration, queries, pageSize, opts, ); } } while (fullyExhausted === false); }