@finos/legend-extension-dsl-data-quality
Version:
Legend extension for Data Quality
360 lines • 19.9 kB
JavaScript
/**
* Copyright (c) 2020-present, Goldman Sachs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { V1_getEngineSerializationFormat, LegendSDLC, PureClientVersion, V1_buildExecutionError, V1_buildExecutionResult, V1_ExecutionError, V1_GraphBuilderContextBuilder, V1_GraphTransformerContextBuilder, V1_LegendSDLC, V1_ProcessingContext, V1_Protocol, V1_PureGraphManager, V1_PureModelContextPointer, V1_pureModelContextPropSchema, V1_deserializeExecutionResult, V1_parameterValueModelSchema, V1_transformParameterValue, V1_transformRawLambda, V1_RemoteEngine, V1_rawLambdaModelSchema, } from '@finos/legend-graph';
import { createModelSchema, list, optional, primitive } from 'serializr';
import { assertErrorThrown, customListWithSchema, guaranteeNonNullable, guaranteeType, NetworkClientError, returnUndefOnError, SerializationFactory, UnsupportedOperationError, usingModelSchema, } from '@finos/legend-shared';
import { DSL_DataQuality_PureGraphManagerExtension } from '../DSL_DataQuality_PureGraphManagerExtension.js';
import { V1_buildDataQualityGraphFetchTree, V1_transformRootGraphFetchTreeToDataQualityRootGraphFetchTree, } from './transformation/V1_DSL_DataQuality_ValueSpecificationBuilderHelper.js';
const DQ_GENERATE_EXECUTION_PLAN = 'generate execution plan';
const DQ_EXECUTE_PLAN = 'execute plan';
const DQ_EXECUTE_DATA_PROFILING = 'execute data profiling';
const DQ_FETCH_RULE_SUGGESTIONS = 'fetch rule suggestions';
const DQ_DEBUG_EXECUTION_PLAN = 'debug execution plan';
const DQ_FETCH_PROPERTY_PATH_TREE = 'dq fetch property path tree';
const DQ_EXECUTE_RECONCILIATION = 'execute reconciliation';
export class V1_DQExecuteInput {
clientVersion;
model;
lambdaParameterValues = [];
packagePath;
defectsLimit;
queryLimit;
allValidationsChecked;
validationName;
runQuery;
static serialization = new SerializationFactory(createModelSchema(V1_DQExecuteInput, {
clientVersion: optional(primitive()),
model: V1_pureModelContextPropSchema,
lambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema),
packagePath: primitive(),
defectsLimit: optional(primitive()),
queryLimit: optional(primitive()),
validationName: optional(primitive()),
runQuery: optional(primitive()),
}));
}
export class V1_DQRuleSuggestionInput {
clientVersion;
model;
lambdaParameterValues = [];
packagePath;
static serialization = new SerializationFactory(createModelSchema(V1_DQRuleSuggestionInput, {
clientVersion: optional(primitive()),
model: V1_pureModelContextPropSchema,
lambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema),
packagePath: primitive(),
}));
}
export class V1_DQReconciliationInput {
clientVersion;
model;
source;
target;
keys = [];
colsForHash = [];
defectLimit;
queryLimit;
aggregatedHash;
sourceHashCol;
targetHashCol;
includeColumnValues;
runSourceQuery;
runTargetQuery;
sourceLambdaParameterValues = [];
targetLambdaParameterValues = [];
static serialization = new SerializationFactory(createModelSchema(V1_DQReconciliationInput, {
clientVersion: optional(primitive()),
model: V1_pureModelContextPropSchema,
source: usingModelSchema(V1_rawLambdaModelSchema),
target: usingModelSchema(V1_rawLambdaModelSchema),
keys: list(primitive()),
colsForHash: list(primitive()),
defectLimit: optional(primitive()),
queryLimit: optional(primitive()),
aggregatedHash: optional(primitive()),
sourceHashCol: optional(primitive()),
targetHashCol: optional(primitive()),
includeColumnValues: optional(primitive()),
runSourceQuery: optional(primitive()),
runTargetQuery: optional(primitive()),
sourceLambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema),
targetLambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema),
}));
}
export class V1_DSL_Data_Quality_PureGraphManagerExtension extends DSL_DataQuality_PureGraphManagerExtension {
static DEV_PROTOCOL_VERSION = PureClientVersion.VX_X_X;
constructor(graphManager) {
super(graphManager);
this.graphManager = guaranteeType(graphManager, V1_PureGraphManager);
}
getSupportedProtocolVersion() {
return PureClientVersion.V1_0_0;
}
buildPureModelSDLCPointer(origin, clientVersion) {
if (origin instanceof LegendSDLC) {
return new V1_PureModelContextPointer(clientVersion
? new V1_Protocol(V1_PureGraphManager.PURE_PROTOCOL_NAME, clientVersion)
: undefined, new V1_LegendSDLC(origin.groupId, origin.artifactId, origin.versionId));
}
throw new UnsupportedOperationError('Unsupported graph origin');
}
executeValidation = (input, options) => {
// TODO: improve abstraction so that we do not need to access the engine server client directly
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'executeValidation is only supported by remote engine').getEngineServerClient();
return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_EXECUTE_PLAN), `${engineServerClient._pure()}/dataquality/execute`, input, {}, undefined, {
serializationFormat: options?.serializationFormat
? V1_getEngineSerializationFormat(options.serializationFormat)
: undefined,
}, { enableCompression: true }, {
skipProcessing: Boolean(options?.returnAsResponse),
});
};
async runValidationAndReturnString(input) {
return (await this.executeValidation(V1_DQExecuteInput.serialization.toJson(input), {
returnAsResponse: true,
})).text();
}
async export(input, options) {
try {
return guaranteeNonNullable((await this.executeValidation(V1_DQExecuteInput.serialization.toJson(input), {
serializationFormat: options?.serializationFormat,
returnAsResponse: true,
})));
}
catch (error) {
assertErrorThrown(error);
if (error instanceof NetworkClientError) {
throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload));
}
throw error;
}
}
createExecutionInput(graph, packagePath, dqExecuteInput, options) {
dqExecuteInput.clientVersion =
options.clientVersion ??
V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION;
dqExecuteInput.model = graph.origin
? this.buildPureModelSDLCPointer(graph.origin, undefined)
: this.graphManager.getFullGraphModelData(graph);
dqExecuteInput.lambdaParameterValues = options.lambdaParameterValues
? options.lambdaParameterValues.map(V1_transformParameterValue)
: [];
dqExecuteInput.packagePath = packagePath;
dqExecuteInput.defectsLimit = options.previewLimit;
dqExecuteInput.runQuery = options.runQuery;
if (options.runQuery) {
dqExecuteInput.queryLimit = options.queryLimit;
}
if (!options.allValidationsChecked) {
dqExecuteInput.validationName = options.validationName;
}
return dqExecuteInput;
}
generatePlan = async (graph, packagePath, options) => {
const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options);
const serializedInput = V1_DQExecuteInput.serialization.toJson(input);
// TODO: improve abstraction so that we do not need to access the engine server client directly
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'generatePlan is only supported by remote engine').getEngineServerClient();
return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_GENERATE_EXECUTION_PLAN), `${engineServerClient._pure()}/dataquality/generatePlan`, serializedInput, {}, undefined, undefined, { enableCompression: true });
};
execute = async (graph, packagePath, options) => {
const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options);
try {
const validationResultInText = await this.runValidationAndReturnString(input);
const rawExecutionResult = returnUndefOnError(() => this.graphManager.engine.parseExecutionResults(validationResultInText, undefined)) ?? validationResultInText;
const v1_executionResult = V1_deserializeExecutionResult(rawExecutionResult);
return V1_buildExecutionResult(v1_executionResult);
}
catch (error) {
assertErrorThrown(error);
if (error instanceof NetworkClientError) {
throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload));
}
throw error;
}
};
exportData = async (graph, packagePath, options) => {
const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options);
return this.export(input, options);
};
debugExecutionPlanGeneration = async (graph, packagePath, options) => {
const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options);
const serializedInput = V1_DQExecuteInput.serialization.toJson(input);
// TODO: improve abstraction so that we do not need to access the engine server client directly
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'debugExecutionPlanGeneration is only supported by remote engine').getEngineServerClient();
const result = await engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_DEBUG_EXECUTION_PLAN), `${engineServerClient._pure()}/dataquality/debugPlan`, serializedInput, {}, undefined, undefined, { enableCompression: true });
return {
plan: result.plan,
debug: result.debug.join('\n'),
};
};
fetchStructuralValidations = async (graph, packagePath, options) => {
// TODO: improve abstraction so that we do not need to access the engine server client directly
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'fetchStructuralValidations is only supported by remote engine').getEngineServerClient();
const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options);
const serializedInput = V1_DQExecuteInput.serialization.toJson(input);
const V1_rootGraphFetchTree = await engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_FETCH_PROPERTY_PATH_TREE), `${engineServerClient._pure()}/dataquality/propertyPathTree`, serializedInput, {}, undefined, undefined, { enableCompression: true });
const V1_dataQualityRootGraphFetchTree = V1_transformRootGraphFetchTreeToDataQualityRootGraphFetchTree(V1_rootGraphFetchTree);
const context = new V1_GraphBuilderContextBuilder(graph, graph, this.graphManager.graphBuilderExtensions, this.graphManager.logService).build();
return V1_buildDataQualityGraphFetchTree(V1_dataQualityRootGraphFetchTree, context, undefined, [], new V1_ProcessingContext(''), true);
};
executeDataProfiling = (input, options) => {
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'executeDataProfiling is only supported by remote engine').getEngineServerClient();
return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_EXECUTE_DATA_PROFILING), `${engineServerClient._pure()}/dataquality/profile`, input, {}, undefined, {
serializationFormat: options?.serializationFormat
? V1_getEngineSerializationFormat(options.serializationFormat)
: undefined,
}, { enableCompression: true }, {
skipProcessing: Boolean(options?.returnAsResponse),
});
};
runDataProfiling = async (graph, packagePath, options) => {
const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options);
try {
const profilingResult = (await this.executeDataProfiling(V1_DQExecuteInput.serialization.toJson(input), {
returnAsResponse: true,
}));
const profilingResultInText = await profilingResult.text();
const rawExecutionResult = returnUndefOnError(() => this.graphManager.engine.parseExecutionResults(profilingResultInText, undefined)) ?? profilingResultInText;
const v1_executionResult = V1_deserializeExecutionResult(rawExecutionResult);
return V1_buildExecutionResult(v1_executionResult);
}
catch (error) {
assertErrorThrown(error);
if (error instanceof NetworkClientError) {
throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload));
}
throw error;
}
};
exportDataProfiling = async (graph, packagePath, options) => {
const input = new V1_DQRuleSuggestionInput();
input.packagePath = packagePath;
input.clientVersion =
options.clientVersion ??
V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION;
input.model = graph.origin
? this.buildPureModelSDLCPointer(graph.origin, undefined)
: this.graphManager.getFullGraphModelData(graph);
input.lambdaParameterValues = options.lambdaParameterValues
? options.lambdaParameterValues.map(V1_transformParameterValue)
: [];
try {
return guaranteeNonNullable((await this.executeDataProfiling(V1_DQExecuteInput.serialization.toJson(input), {
serializationFormat: options.serializationFormat,
returnAsResponse: true,
})));
}
catch (error) {
assertErrorThrown(error);
if (error instanceof NetworkClientError) {
throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload));
}
throw error;
}
};
fetchValidationSuggestions = async (graph, packagePath, options) => {
const input = new V1_DQRuleSuggestionInput();
input.packagePath = packagePath;
input.clientVersion =
options.clientVersion ??
V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION;
input.model = graph.origin
? this.buildPureModelSDLCPointer(graph.origin, undefined)
: this.graphManager.getFullGraphModelData(graph);
input.lambdaParameterValues = options.lambdaParameterValues
? options.lambdaParameterValues.map(V1_transformParameterValue)
: [];
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'fetchValidationSuggestions is only supported by remote engine').getEngineServerClient();
return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_FETCH_RULE_SUGGESTIONS), `${engineServerClient._pure()}/dataquality/ruleSuggestions`, V1_DQExecuteInput.serialization.toJson(input), {}, undefined, {}, { enableCompression: true }, {});
};
rawLambdaToV1(lambda) {
return V1_transformRawLambda(lambda, new V1_GraphTransformerContextBuilder(this.graphManager.pluginManager.getPureProtocolProcessorPlugins()).build());
}
createReconciliationInput(graph, options) {
const input = new V1_DQReconciliationInput();
input.clientVersion =
options.clientVersion ??
V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION;
input.model = graph.origin
? this.buildPureModelSDLCPointer(graph.origin, undefined)
: this.graphManager.getFullGraphModelData(graph);
const runningSourceOrTargetQuery = options.runSourceQuery ?? options.runTargetQuery;
input.source = this.rawLambdaToV1(options.source);
input.target = this.rawLambdaToV1(options.target);
input.keys = options.keys;
input.colsForHash = options.colsForHash;
input.aggregatedHash = options.aggregatedHash;
input.sourceHashCol = options.sourceHashCol;
input.targetHashCol = options.targetHashCol;
input.includeColumnValues = options.includeColumnValues;
input.runSourceQuery = options.runSourceQuery;
input.runTargetQuery = options.runTargetQuery;
if (runningSourceOrTargetQuery) {
input.queryLimit = options.limit;
}
else {
input.defectLimit = options.limit;
}
if (options.sourceLambdaParameterValues) {
input.sourceLambdaParameterValues =
options.sourceLambdaParameterValues.map(V1_transformParameterValue);
}
if (options.targetLambdaParameterValues) {
input.targetLambdaParameterValues =
options.targetLambdaParameterValues.map(V1_transformParameterValue);
}
return input;
}
runReconciliation = async (graph, options) => {
const input = this.createReconciliationInput(graph, options);
return this.runReconciliationWithInput(input);
};
runReconciliationSourceQuery = async (graph, options) => {
const input = this.createReconciliationInput(graph, {
...options,
runSourceQuery: true,
runTargetQuery: undefined,
});
return this.runReconciliationWithInput(input);
};
runReconciliationTargetQuery = async (graph, options) => {
const input = this.createReconciliationInput(graph, {
...options,
runSourceQuery: undefined,
runTargetQuery: true,
});
return this.runReconciliationWithInput(input);
};
async runReconciliationWithInput(input) {
try {
const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'runReconciliation is only supported by remote engine').getEngineServerClient();
const result = await engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_EXECUTE_RECONCILIATION), `${engineServerClient._pure()}/dataquality/reconciliation`, V1_DQReconciliationInput.serialization.toJson(input), {}, undefined, undefined, { enableCompression: true }, { skipProcessing: true });
const resultInText = await result.text();
const rawExecutionResult = returnUndefOnError(() => this.graphManager.engine.parseExecutionResults(resultInText, undefined)) ?? resultInText;
const v1_executionResult = V1_deserializeExecutionResult(rawExecutionResult);
return V1_buildExecutionResult(v1_executionResult);
}
catch (error) {
assertErrorThrown(error);
if (error instanceof NetworkClientError) {
throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload));
}
throw error;
}
}
}
//# sourceMappingURL=V1_DSL_Data_Quality_PureGraphManagerExtension.js.map