@finos/legend-extension-dsl-data-quality
Version:
Legend extension for Data Quality
750 lines (678 loc) • 24.3 kB
text/typescript
/**
* Copyright (c) 2026-present, Goldman Sachs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
action,
computed,
flow,
flowResult,
makeObservable,
observable,
} from 'mobx';
import {
type EditorStore,
ElementEditorState,
} from '@finos/legend-application-studio';
import {
type PackageableElement,
type ExecutionResult,
type RawLambda,
RawVariableExpression,
buildSourceInformationSourceId,
buildLambdaVariableExpressions,
observe_ValueSpecification,
VariableExpression,
GRAPH_MANAGER_EVENT,
ParserError,
RawLambda as RawLambdaCtor,
RelationTypeMetadata,
observe_RelationTypeMetadata,
} from '@finos/legend-graph';
import {
type GeneratorFn,
ActionState,
assertErrorThrown,
guaranteeType,
hashArray,
LogEvent,
StopWatch,
filterByType,
} from '@finos/legend-shared';
import {
buildExecutionParameterValues,
doesLambdaParameterStateContainFunctionValues,
ParameterInstanceValuesEditorState,
LambdaEditorState,
LambdaParametersState,
LambdaParameterState,
PARAMETER_SUBMIT_ACTION,
} from '@finos/legend-query-builder';
import {
type DataQualityRelationComparisonConfiguration,
type DataQualityRelationQueryLambda,
type ReconStrategy,
MD5HashStrategy,
} from '../../graph-manager/index.js';
import { DATA_QUALITY_HASH_STRUCTURE } from '../../graph/metamodel/DSL_DataQuality_HashUtils.js';
import { getDataQualityPureGraphManagerExtension } from '../../graph-manager/protocol/pure/DSL_DataQuality_PureGraphManagerExtension.js';
export type ComparisonSide = 'source' | 'target';
export enum RECONCILIATION_EXECUTION_TYPE {
RECONCILIATION = 'RECONCILIATION',
SOURCE_QUERY = 'SOURCE_QUERY',
TARGET_QUERY = 'TARGET_QUERY',
}
export const DEFAULT_LIMIT = 1000;
export class ComparisonLambdaEditorState extends LambdaEditorState {
readonly editorStore: EditorStore;
readonly queryLambda: DataQualityRelationQueryLambda;
readonly label: ComparisonSide;
readonly configurationState!: DataQualityRelationComparisonConfigurationState;
isConvertingFunctionBodyToString = false;
constructor(
configurationState: DataQualityRelationComparisonConfigurationState,
queryLambda: DataQualityRelationQueryLambda,
editorStore: EditorStore,
label: ComparisonSide,
) {
super('', '');
makeObservable(this, {
isConvertingFunctionBodyToString: observable,
});
this.queryLambda = queryLambda;
this.editorStore = editorStore;
this.label = label;
this.configurationState = configurationState;
}
get lambdaId(): string {
return buildSourceInformationSourceId([`comparison_${this.label}`]);
}
*convertLambdaGrammarStringToObject(): GeneratorFn<void> {
if (this.lambdaString) {
try {
const lambda =
(yield this.editorStore.graphManagerState.graphManager.pureCodeToLambda(
this.fullLambdaString,
this.lambdaId,
)) as RawLambda;
this.setParserError(undefined);
const lambdaParameters =
(lambda.parameters as object[] | undefined) ?? [];
this.queryLambda.parameters = lambdaParameters
.map((param) =>
this.editorStore.graphManagerState.graphManager.buildRawValueSpecification(
param,
this.editorStore.graphManagerState.graph,
),
)
.map((rawValueSpec) =>
guaranteeType(rawValueSpec, RawVariableExpression),
);
this.queryLambda.body = lambda.body;
// Refresh relation columns after a successful query update
yield flowResult(
this.configurationState.fetchColumnsForLambda(
this.queryLambda,
this.label,
),
);
} catch (error) {
assertErrorThrown(error);
if (error instanceof ParserError) {
this.setParserError(error);
}
this.editorStore.applicationStore.logService.error(
LogEvent.create(GRAPH_MANAGER_EVENT.PARSING_FAILURE),
error,
);
}
} else {
this.clearErrors();
this.queryLambda.body = new RawLambdaCtor(undefined, undefined).body;
this.queryLambda.parameters = [];
}
}
*convertLambdaObjectToGrammarString(options?: {
pretty?: boolean | undefined;
preserveCompilationError?: boolean | undefined;
firstLoad?: boolean | undefined;
}): GeneratorFn<void> {
this.isConvertingFunctionBodyToString = true;
try {
const lambdas = new Map<string, RawLambda>();
const functionLambda = this.configurationState.buildRawLambda(
this.queryLambda,
);
lambdas.set(this.lambdaId, functionLambda);
const isolatedLambdas =
(yield this.editorStore.graphManagerState.graphManager.lambdasToPureCode(
lambdas,
options?.pretty,
)) as Map<string, string>;
const grammarText = isolatedLambdas.get(this.lambdaId);
this.setLambdaString(grammarText ?? '');
if (!options?.firstLoad) {
this.clearErrors({
preserveCompilationError: options?.preserveCompilationError,
});
}
this.isConvertingFunctionBodyToString = false;
} catch (error) {
assertErrorThrown(error);
this.editorStore.applicationStore.logService.error(
LogEvent.create(GRAPH_MANAGER_EVENT.PARSING_FAILURE),
error,
);
this.isConvertingFunctionBodyToString = false;
}
}
get hashCode(): string {
return hashArray([
DATA_QUALITY_HASH_STRUCTURE.DATA_QUALITY_RELATION_FUNCTION_DEFINITION,
this.queryLambda.body ? JSON.stringify(this.queryLambda.body) : '',
]);
}
}
export class ComparisonParametersState extends LambdaParametersState {
readonly configurationState: DataQualityRelationComparisonConfigurationState;
constructor(
configurationState: DataQualityRelationComparisonConfigurationState,
) {
super();
makeObservable(this, {
parameterValuesEditorState: observable,
parameterStates: observable,
addParameter: action,
removeParameter: action,
openModal: action,
build: action,
setParameters: action,
});
this.configurationState = configurationState;
}
openModal(lambda: RawLambda, onSubmit: () => Promise<void>): void {
this.parameterStates = this.build(lambda);
this.parameterValuesEditorState.open(
(): Promise<void> =>
onSubmit().catch(
this.configurationState.editorStore.applicationStore
.alertUnhandledError,
),
PARAMETER_SUBMIT_ACTION.RUN,
);
}
build(lambda: RawLambda): LambdaParameterState[] {
const parameters = buildLambdaVariableExpressions(
lambda,
this.configurationState.editorStore.graphManagerState,
)
.map((parameter) =>
observe_ValueSpecification(
parameter,
this.configurationState.editorStore.changeDetectionState
.observerContext,
),
)
.filter(filterByType(VariableExpression));
const existingStatesByName = new Map(
this.parameterStates.map((parameterState) => [
parameterState.variableName,
parameterState,
]),
);
return parameters.map((variable) => {
const parameterState = new LambdaParameterState(
variable,
this.configurationState.editorStore.changeDetectionState.observerContext,
this.configurationState.editorStore.graphManagerState.graph,
);
const existingState = existingStatesByName.get(
parameterState.variableName,
);
if (existingState?.value) {
parameterState.setValue(existingState.value);
} else {
parameterState.mockParameterValue();
}
return parameterState;
});
}
}
export class DataQualityRelationComparisonConfigurationState extends ElementEditorState {
declare element: DataQualityRelationComparisonConfiguration;
sourceLambdaEditorState: ComparisonLambdaEditorState;
targetLambdaEditorState: ComparisonLambdaEditorState;
sourceColumnMetadata: RelationTypeMetadata = new RelationTypeMetadata();
targetColumnMetadata: RelationTypeMetadata = new RelationTypeMetadata();
lastSourceQueryHash: string | undefined = undefined;
lastTargetQueryHash: string | undefined = undefined;
// Column-fetch state
readonly fetchColumnsState = ActionState.create();
sourceColumnFetchError: string | undefined = undefined;
targetColumnFetchError: string | undefined = undefined;
// Execution state
currentExecutionType: RECONCILIATION_EXECUTION_TYPE | undefined = undefined;
lastExecutionType: RECONCILIATION_EXECUTION_TYPE | undefined = undefined;
executionResult?: ExecutionResult | undefined;
executionDuration?: number | undefined;
runPromise: Promise<ExecutionResult> | undefined = undefined;
limit = DEFAULT_LIMIT;
sourceParametersState: ComparisonParametersState;
targetParametersState: ComparisonParametersState;
comparisonParametersEditorState = new ParameterInstanceValuesEditorState();
constructor(editorStore: EditorStore, element: PackageableElement) {
super(editorStore, element);
this.element = element as DataQualityRelationComparisonConfiguration;
this.sourceLambdaEditorState = new ComparisonLambdaEditorState(
this,
this.element.source,
editorStore,
'source',
);
this.targetLambdaEditorState = new ComparisonLambdaEditorState(
this,
this.element.target,
editorStore,
'target',
);
this.sourceParametersState = new ComparisonParametersState(this);
this.targetParametersState = new ComparisonParametersState(this);
makeObservable(this, {
setKeys: action,
setColumnsToCompare: action,
setStrategy: action,
setSourceHashColumn: action,
setTargetHashColumn: action,
setAggregatedHash: action,
sourceColumnMetadata: observable,
targetColumnMetadata: observable,
lastSourceQueryHash: observable,
lastTargetQueryHash: observable,
sourceLambdaEditorState: observable,
targetLambdaEditorState: observable,
fetchColumnsForLambda: flow,
retryFetchColumns: flow,
sourceColumnFetchError: observable,
targetColumnFetchError: observable,
hasColumnFetchError: computed,
columnFetchError: computed,
hasNoOverlappingColumns: computed,
sourceColumnOptions: computed,
targetColumnOptions: computed,
combinedColumnOptions: computed,
// Execution observables
currentExecutionType: observable,
lastExecutionType: observable,
executionResult: observable,
executionDuration: observable,
runPromise: observable,
limit: observable,
isRunning: computed,
setExecutionResult: action,
setRunPromise: action,
setExecutionDuration: action,
setLimit: action,
handleRun: flow,
run: flow,
cancelRun: flow,
openComparisonParametersModal: action,
});
}
setKeys(keys: string[]): void {
this.element.keys = keys;
}
setColumnsToCompare(columns: string[]): void {
this.element.columnsToCompare = columns;
}
setStrategy(strategy: ReconStrategy): void {
this.element.strategy = strategy;
}
setSourceHashColumn(value: string | undefined): void {
guaranteeType(this.element.strategy, MD5HashStrategy).sourceHashColumn =
value;
}
setTargetHashColumn(value: string | undefined): void {
guaranteeType(this.element.strategy, MD5HashStrategy).targetHashColumn =
value;
}
setAggregatedHash(value: boolean | undefined): void {
guaranteeType(this.element.strategy, MD5HashStrategy).aggregatedHash =
value;
}
get sourceColumnOptions(): { value: string; label: string }[] {
return this.sourceColumnMetadata.columns.map((col) => ({
value: col.name,
label: col.name,
}));
}
get targetColumnOptions(): { value: string; label: string }[] {
return this.targetColumnMetadata.columns.map((col) => ({
value: col.name,
label: col.name,
}));
}
get combinedColumnOptions(): { value: string; label: string }[] {
return this.sourceColumnOptions.filter((srcOpt) =>
this.targetColumnOptions.some((tgtOpt) => tgtOpt.value === srcOpt.value),
);
}
get hasColumnFetchError(): boolean {
return (
this.sourceColumnFetchError !== undefined ||
this.targetColumnFetchError !== undefined
);
}
get columnFetchError(): string | undefined {
const errors = [
this.sourceColumnFetchError,
this.targetColumnFetchError,
].filter(Boolean);
return errors.length > 0 ? errors.join('; ') : undefined;
}
get hasNoOverlappingColumns(): boolean {
return (
!this.fetchColumnsState.isInProgress &&
!this.hasColumnFetchError &&
this.sourceColumnOptions.length > 0 &&
this.targetColumnOptions.length > 0 &&
this.combinedColumnOptions.length === 0
);
}
get isRunning(): boolean {
return this.currentExecutionType !== undefined;
}
setExecutionResult(
executionResult: ExecutionResult | undefined,
type: RECONCILIATION_EXECUTION_TYPE,
): void {
this.lastExecutionType = type;
this.executionResult = executionResult;
}
setRunPromise(promise: Promise<ExecutionResult> | undefined): void {
this.runPromise = promise;
}
setExecutionDuration(val: number | undefined): void {
this.executionDuration = val;
}
setLimit(val: number): void {
this.limit = Math.max(1, val);
}
private assertNoLetInjectionParameters(
type: RECONCILIATION_EXECUTION_TYPE,
): void {
const unsupportedSourceParameters =
type !== RECONCILIATION_EXECUTION_TYPE.TARGET_QUERY
? this.sourceParametersState.parameterStates.filter(
doesLambdaParameterStateContainFunctionValues,
)
: [];
const unsupportedTargetParameters =
type !== RECONCILIATION_EXECUTION_TYPE.SOURCE_QUERY
? this.targetParametersState.parameterStates.filter(
doesLambdaParameterStateContainFunctionValues,
)
: [];
if (
unsupportedSourceParameters.length === 0 &&
unsupportedTargetParameters.length === 0
) {
return;
}
const errors: string[] = [];
if (unsupportedSourceParameters.length > 0) {
errors.push(
`Source query parameters require function-value let injection (${unsupportedSourceParameters
.map((parameterState) => parameterState.variableName)
.join(', ')}), which reconciliation execution does not support.`,
);
}
if (unsupportedTargetParameters.length > 0) {
errors.push(
`Target query parameters require function-value let injection (${unsupportedTargetParameters
.map((parameterState) => parameterState.variableName)
.join(', ')}), which reconciliation execution does not support.`,
);
}
throw new Error(errors.join(' '));
}
buildRawLambda(queryLambda: DataQualityRelationQueryLambda): RawLambdaCtor {
const serializedParams = queryLambda.parameters.map((parameter) =>
this.editorStore.graphManagerState.graphManager.serializeRawValueSpecification(
parameter,
),
);
return new RawLambdaCtor(serializedParams, queryLambda.body);
}
private buildSourceLambda(): RawLambdaCtor {
return this.buildRawLambda(this.element.source);
}
private buildTargetLambda(): RawLambdaCtor {
return this.buildRawLambda(this.element.target);
}
private get sourceHasParameters(): boolean {
const params = (this.buildSourceLambda().parameters ?? []) as object[];
return params.length > 0;
}
private get targetHasParameters(): boolean {
const params = (this.buildTargetLambda().parameters ?? []) as object[];
return params.length > 0;
}
openComparisonParametersModal(onSubmit: () => Promise<void>): void {
this.sourceParametersState.setParameters(
this.sourceParametersState.build(this.buildSourceLambda()),
);
this.targetParametersState.setParameters(
this.targetParametersState.build(this.buildTargetLambda()),
);
this.comparisonParametersEditorState.open(
(): Promise<void> =>
onSubmit().catch(this.editorStore.applicationStore.alertUnhandledError),
PARAMETER_SUBMIT_ACTION.RUN,
);
}
*handleRun(type: RECONCILIATION_EXECUTION_TYPE): GeneratorFn<void> {
if (this.isRunning) {
return;
}
const needsSourceParams =
this.sourceHasParameters &&
(type === RECONCILIATION_EXECUTION_TYPE.RECONCILIATION ||
type === RECONCILIATION_EXECUTION_TYPE.SOURCE_QUERY);
const needsTargetParams =
this.targetHasParameters &&
(type === RECONCILIATION_EXECUTION_TYPE.RECONCILIATION ||
type === RECONCILIATION_EXECUTION_TYPE.TARGET_QUERY);
if (needsSourceParams && needsTargetParams) {
this.openComparisonParametersModal(
(): Promise<void> => flowResult(this.run(type)),
);
} else if (needsSourceParams) {
this.sourceParametersState.openModal(
this.buildSourceLambda(),
(): Promise<void> => flowResult(this.run(type)),
);
} else if (needsTargetParams) {
this.targetParametersState.openModal(
this.buildTargetLambda(),
(): Promise<void> => flowResult(this.run(type)),
);
} else {
yield flowResult(this.run(type));
}
}
*run(type: RECONCILIATION_EXECUTION_TYPE): GeneratorFn<void> {
let promise: Promise<ExecutionResult> | undefined = undefined;
const stopWatch = new StopWatch();
try {
this.currentExecutionType = type;
const model = this.editorStore.graphManagerState.graph;
const extension = getDataQualityPureGraphManagerExtension(
this.editorStore.graphManagerState.graphManager,
);
const md5Strategy = guaranteeType(this.element.strategy, MD5HashStrategy);
this.assertNoLetInjectionParameters(type);
const sourceExecutionLambda = this.buildSourceLambda();
const targetExecutionLambda = this.buildTargetLambda();
const sourceParamValues = this.sourceHasParameters
? buildExecutionParameterValues(
this.sourceParametersState.parameterStates,
this.editorStore.graphManagerState,
)
: [];
const targetParamValues = this.targetHasParameters
? buildExecutionParameterValues(
this.targetParametersState.parameterStates,
this.editorStore.graphManagerState,
)
: [];
if (type === RECONCILIATION_EXECUTION_TYPE.RECONCILIATION) {
promise = extension.runReconciliation(model, {
source: sourceExecutionLambda,
target: targetExecutionLambda,
keys: this.element.keys,
colsForHash: this.element.columnsToCompare,
limit: this.limit,
aggregatedHash: md5Strategy.aggregatedHash,
sourceHashCol: md5Strategy.sourceHashColumn,
targetHashCol: md5Strategy.targetHashColumn,
// make sure we fetch all columns we compare so users can see the differences
includeColumnValues: true,
sourceLambdaParameterValues: sourceParamValues,
targetLambdaParameterValues: targetParamValues,
});
} else if (type === RECONCILIATION_EXECUTION_TYPE.SOURCE_QUERY) {
promise = extension.runReconciliationSourceQuery(model, {
source: sourceExecutionLambda,
target: targetExecutionLambda,
keys: this.element.keys,
limit: this.limit,
colsForHash: this.element.columnsToCompare,
sourceLambdaParameterValues: sourceParamValues,
});
} else {
promise = extension.runReconciliationTargetQuery(model, {
source: sourceExecutionLambda,
target: targetExecutionLambda,
keys: this.element.keys,
limit: this.limit,
colsForHash: this.element.columnsToCompare,
targetLambdaParameterValues: targetParamValues,
});
}
this.setRunPromise(promise);
const result = (yield promise) as ExecutionResult;
if (this.runPromise === promise) {
this.setExecutionResult(result, type);
this.setExecutionDuration(stopWatch.elapsed);
}
} catch (error) {
if (this.runPromise === promise) {
assertErrorThrown(error);
this.setExecutionResult(undefined, type);
this.editorStore.applicationStore.logService.error(
LogEvent.create(GRAPH_MANAGER_EVENT.EXECUTION_FAILURE),
error,
);
this.editorStore.applicationStore.notificationService.notifyError(
error,
);
}
} finally {
this.currentExecutionType = undefined;
}
}
*cancelRun(): GeneratorFn<void> {
this.currentExecutionType = undefined;
this.setRunPromise(undefined);
try {
yield this.editorStore.graphManagerState.graphManager.cancelUserExecutions(
true,
);
} catch (error) {
this.editorStore.applicationStore.logService.error(
LogEvent.create(GRAPH_MANAGER_EVENT.EXECUTION_FAILURE),
error,
);
}
}
*fetchColumnsForLambda(
queryLambda: DataQualityRelationQueryLambda,
side: ComparisonSide,
): GeneratorFn<void> {
const { body } = queryLambda;
if (!body || (Array.isArray(body) && body.length === 0)) {
return;
}
const lambda = this.buildRawLambda(queryLambda);
const editorState =
side === 'source'
? this.sourceLambdaEditorState
: this.targetLambdaEditorState;
const currentQueryHash = editorState.hashCode;
const lastHash =
side === 'source' ? this.lastSourceQueryHash : this.lastTargetQueryHash;
if (currentQueryHash === lastHash) {
return;
}
this.fetchColumnsState.inProgress();
try {
const metadata = observe_RelationTypeMetadata(
(yield this.editorStore.graphManagerState.graphManager.getLambdaRelationType(
lambda,
this.editorStore.graphManagerState.graph,
)) as RelationTypeMetadata,
);
if (side === 'source') {
this.sourceColumnMetadata = metadata;
this.lastSourceQueryHash = currentQueryHash;
this.sourceColumnFetchError = undefined;
} else {
this.targetColumnMetadata = metadata;
this.lastTargetQueryHash = currentQueryHash;
this.targetColumnFetchError = undefined;
}
} catch (error) {
assertErrorThrown(error);
// Update the hash even on failure so that reverting to a previously
// successful query will see a different hash and trigger a refetch.
if (side === 'source') {
this.lastSourceQueryHash = currentQueryHash;
this.sourceColumnFetchError = `Failed to fetch source relation columns: ${error.message}`;
} else {
this.lastTargetQueryHash = currentQueryHash;
this.targetColumnFetchError = `Failed to fetch target relation columns: ${error.message}`;
}
} finally {
this.fetchColumnsState.complete();
}
}
*retryFetchColumns(): GeneratorFn<void> {
// Reset hashes to force a refetch
this.lastSourceQueryHash = undefined;
this.lastTargetQueryHash = undefined;
this.sourceColumnFetchError = undefined;
this.targetColumnFetchError = undefined;
yield flowResult(this.fetchColumnsForLambda(this.element.source, 'source'));
yield flowResult(this.fetchColumnsForLambda(this.element.target, 'target'));
}
override reprocess(
newElement: PackageableElement,
editorStore: EditorStore,
): ElementEditorState {
return new DataQualityRelationComparisonConfigurationState(
editorStore,
newElement,
);
}
}