UNPKG

@finos/legend-extension-dsl-data-quality

Version:
360 lines 19.9 kB
/** * Copyright (c) 2020-present, Goldman Sachs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import { V1_getEngineSerializationFormat, LegendSDLC, PureClientVersion, V1_buildExecutionError, V1_buildExecutionResult, V1_ExecutionError, V1_GraphBuilderContextBuilder, V1_GraphTransformerContextBuilder, V1_LegendSDLC, V1_ProcessingContext, V1_Protocol, V1_PureGraphManager, V1_PureModelContextPointer, V1_pureModelContextPropSchema, V1_deserializeExecutionResult, V1_parameterValueModelSchema, V1_transformParameterValue, V1_transformRawLambda, V1_RemoteEngine, V1_rawLambdaModelSchema, } from '@finos/legend-graph'; import { createModelSchema, list, optional, primitive } from 'serializr'; import { assertErrorThrown, customListWithSchema, guaranteeNonNullable, guaranteeType, NetworkClientError, returnUndefOnError, SerializationFactory, UnsupportedOperationError, usingModelSchema, } from '@finos/legend-shared'; import { DSL_DataQuality_PureGraphManagerExtension } from '../DSL_DataQuality_PureGraphManagerExtension.js'; import { V1_buildDataQualityGraphFetchTree, V1_transformRootGraphFetchTreeToDataQualityRootGraphFetchTree, } from './transformation/V1_DSL_DataQuality_ValueSpecificationBuilderHelper.js'; const DQ_GENERATE_EXECUTION_PLAN = 'generate execution plan'; const DQ_EXECUTE_PLAN = 'execute plan'; const DQ_EXECUTE_DATA_PROFILING = 'execute data profiling'; const DQ_FETCH_RULE_SUGGESTIONS = 'fetch rule suggestions'; const DQ_DEBUG_EXECUTION_PLAN = 'debug execution plan'; const DQ_FETCH_PROPERTY_PATH_TREE = 'dq fetch property path tree'; const DQ_EXECUTE_RECONCILIATION = 'execute reconciliation'; export class V1_DQExecuteInput { clientVersion; model; lambdaParameterValues = []; packagePath; defectsLimit; queryLimit; allValidationsChecked; validationName; runQuery; static serialization = new SerializationFactory(createModelSchema(V1_DQExecuteInput, { clientVersion: optional(primitive()), model: V1_pureModelContextPropSchema, lambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema), packagePath: primitive(), defectsLimit: optional(primitive()), queryLimit: optional(primitive()), validationName: optional(primitive()), runQuery: optional(primitive()), })); } export class V1_DQRuleSuggestionInput { clientVersion; model; lambdaParameterValues = []; packagePath; static serialization = new SerializationFactory(createModelSchema(V1_DQRuleSuggestionInput, { clientVersion: optional(primitive()), model: V1_pureModelContextPropSchema, lambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema), packagePath: primitive(), })); } export class V1_DQReconciliationInput { clientVersion; model; source; target; keys = []; colsForHash = []; defectLimit; queryLimit; aggregatedHash; sourceHashCol; targetHashCol; includeColumnValues; runSourceQuery; runTargetQuery; sourceLambdaParameterValues = []; targetLambdaParameterValues = []; static serialization = new SerializationFactory(createModelSchema(V1_DQReconciliationInput, { clientVersion: optional(primitive()), model: V1_pureModelContextPropSchema, source: usingModelSchema(V1_rawLambdaModelSchema), target: usingModelSchema(V1_rawLambdaModelSchema), keys: list(primitive()), colsForHash: list(primitive()), defectLimit: optional(primitive()), queryLimit: optional(primitive()), aggregatedHash: optional(primitive()), sourceHashCol: optional(primitive()), targetHashCol: optional(primitive()), includeColumnValues: optional(primitive()), runSourceQuery: optional(primitive()), runTargetQuery: optional(primitive()), sourceLambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema), targetLambdaParameterValues: customListWithSchema(V1_parameterValueModelSchema), })); } export class V1_DSL_Data_Quality_PureGraphManagerExtension extends DSL_DataQuality_PureGraphManagerExtension { static DEV_PROTOCOL_VERSION = PureClientVersion.VX_X_X; constructor(graphManager) { super(graphManager); this.graphManager = guaranteeType(graphManager, V1_PureGraphManager); } getSupportedProtocolVersion() { return PureClientVersion.V1_0_0; } buildPureModelSDLCPointer(origin, clientVersion) { if (origin instanceof LegendSDLC) { return new V1_PureModelContextPointer(clientVersion ? new V1_Protocol(V1_PureGraphManager.PURE_PROTOCOL_NAME, clientVersion) : undefined, new V1_LegendSDLC(origin.groupId, origin.artifactId, origin.versionId)); } throw new UnsupportedOperationError('Unsupported graph origin'); } executeValidation = (input, options) => { // TODO: improve abstraction so that we do not need to access the engine server client directly const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'executeValidation is only supported by remote engine').getEngineServerClient(); return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_EXECUTE_PLAN), `${engineServerClient._pure()}/dataquality/execute`, input, {}, undefined, { serializationFormat: options?.serializationFormat ? V1_getEngineSerializationFormat(options.serializationFormat) : undefined, }, { enableCompression: true }, { skipProcessing: Boolean(options?.returnAsResponse), }); }; async runValidationAndReturnString(input) { return (await this.executeValidation(V1_DQExecuteInput.serialization.toJson(input), { returnAsResponse: true, })).text(); } async export(input, options) { try { return guaranteeNonNullable((await this.executeValidation(V1_DQExecuteInput.serialization.toJson(input), { serializationFormat: options?.serializationFormat, returnAsResponse: true, }))); } catch (error) { assertErrorThrown(error); if (error instanceof NetworkClientError) { throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload)); } throw error; } } createExecutionInput(graph, packagePath, dqExecuteInput, options) { dqExecuteInput.clientVersion = options.clientVersion ?? V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION; dqExecuteInput.model = graph.origin ? this.buildPureModelSDLCPointer(graph.origin, undefined) : this.graphManager.getFullGraphModelData(graph); dqExecuteInput.lambdaParameterValues = options.lambdaParameterValues ? options.lambdaParameterValues.map(V1_transformParameterValue) : []; dqExecuteInput.packagePath = packagePath; dqExecuteInput.defectsLimit = options.previewLimit; dqExecuteInput.runQuery = options.runQuery; if (options.runQuery) { dqExecuteInput.queryLimit = options.queryLimit; } if (!options.allValidationsChecked) { dqExecuteInput.validationName = options.validationName; } return dqExecuteInput; } generatePlan = async (graph, packagePath, options) => { const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options); const serializedInput = V1_DQExecuteInput.serialization.toJson(input); // TODO: improve abstraction so that we do not need to access the engine server client directly const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'generatePlan is only supported by remote engine').getEngineServerClient(); return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_GENERATE_EXECUTION_PLAN), `${engineServerClient._pure()}/dataquality/generatePlan`, serializedInput, {}, undefined, undefined, { enableCompression: true }); }; execute = async (graph, packagePath, options) => { const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options); try { const validationResultInText = await this.runValidationAndReturnString(input); const rawExecutionResult = returnUndefOnError(() => this.graphManager.engine.parseExecutionResults(validationResultInText, undefined)) ?? validationResultInText; const v1_executionResult = V1_deserializeExecutionResult(rawExecutionResult); return V1_buildExecutionResult(v1_executionResult); } catch (error) { assertErrorThrown(error); if (error instanceof NetworkClientError) { throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload)); } throw error; } }; exportData = async (graph, packagePath, options) => { const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options); return this.export(input, options); }; debugExecutionPlanGeneration = async (graph, packagePath, options) => { const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options); const serializedInput = V1_DQExecuteInput.serialization.toJson(input); // TODO: improve abstraction so that we do not need to access the engine server client directly const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'debugExecutionPlanGeneration is only supported by remote engine').getEngineServerClient(); const result = await engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_DEBUG_EXECUTION_PLAN), `${engineServerClient._pure()}/dataquality/debugPlan`, serializedInput, {}, undefined, undefined, { enableCompression: true }); return { plan: result.plan, debug: result.debug.join('\n'), }; }; fetchStructuralValidations = async (graph, packagePath, options) => { // TODO: improve abstraction so that we do not need to access the engine server client directly const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'fetchStructuralValidations is only supported by remote engine').getEngineServerClient(); const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options); const serializedInput = V1_DQExecuteInput.serialization.toJson(input); const V1_rootGraphFetchTree = await engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_FETCH_PROPERTY_PATH_TREE), `${engineServerClient._pure()}/dataquality/propertyPathTree`, serializedInput, {}, undefined, undefined, { enableCompression: true }); const V1_dataQualityRootGraphFetchTree = V1_transformRootGraphFetchTreeToDataQualityRootGraphFetchTree(V1_rootGraphFetchTree); const context = new V1_GraphBuilderContextBuilder(graph, graph, this.graphManager.graphBuilderExtensions, this.graphManager.logService).build(); return V1_buildDataQualityGraphFetchTree(V1_dataQualityRootGraphFetchTree, context, undefined, [], new V1_ProcessingContext(''), true); }; executeDataProfiling = (input, options) => { const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'executeDataProfiling is only supported by remote engine').getEngineServerClient(); return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_EXECUTE_DATA_PROFILING), `${engineServerClient._pure()}/dataquality/profile`, input, {}, undefined, { serializationFormat: options?.serializationFormat ? V1_getEngineSerializationFormat(options.serializationFormat) : undefined, }, { enableCompression: true }, { skipProcessing: Boolean(options?.returnAsResponse), }); }; runDataProfiling = async (graph, packagePath, options) => { const input = this.createExecutionInput(graph, packagePath, new V1_DQExecuteInput(), options); try { const profilingResult = (await this.executeDataProfiling(V1_DQExecuteInput.serialization.toJson(input), { returnAsResponse: true, })); const profilingResultInText = await profilingResult.text(); const rawExecutionResult = returnUndefOnError(() => this.graphManager.engine.parseExecutionResults(profilingResultInText, undefined)) ?? profilingResultInText; const v1_executionResult = V1_deserializeExecutionResult(rawExecutionResult); return V1_buildExecutionResult(v1_executionResult); } catch (error) { assertErrorThrown(error); if (error instanceof NetworkClientError) { throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload)); } throw error; } }; exportDataProfiling = async (graph, packagePath, options) => { const input = new V1_DQRuleSuggestionInput(); input.packagePath = packagePath; input.clientVersion = options.clientVersion ?? V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION; input.model = graph.origin ? this.buildPureModelSDLCPointer(graph.origin, undefined) : this.graphManager.getFullGraphModelData(graph); input.lambdaParameterValues = options.lambdaParameterValues ? options.lambdaParameterValues.map(V1_transformParameterValue) : []; try { return guaranteeNonNullable((await this.executeDataProfiling(V1_DQExecuteInput.serialization.toJson(input), { serializationFormat: options.serializationFormat, returnAsResponse: true, }))); } catch (error) { assertErrorThrown(error); if (error instanceof NetworkClientError) { throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload)); } throw error; } }; fetchValidationSuggestions = async (graph, packagePath, options) => { const input = new V1_DQRuleSuggestionInput(); input.packagePath = packagePath; input.clientVersion = options.clientVersion ?? V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION; input.model = graph.origin ? this.buildPureModelSDLCPointer(graph.origin, undefined) : this.graphManager.getFullGraphModelData(graph); input.lambdaParameterValues = options.lambdaParameterValues ? options.lambdaParameterValues.map(V1_transformParameterValue) : []; const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'fetchValidationSuggestions is only supported by remote engine').getEngineServerClient(); return engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_FETCH_RULE_SUGGESTIONS), `${engineServerClient._pure()}/dataquality/ruleSuggestions`, V1_DQExecuteInput.serialization.toJson(input), {}, undefined, {}, { enableCompression: true }, {}); }; rawLambdaToV1(lambda) { return V1_transformRawLambda(lambda, new V1_GraphTransformerContextBuilder(this.graphManager.pluginManager.getPureProtocolProcessorPlugins()).build()); } createReconciliationInput(graph, options) { const input = new V1_DQReconciliationInput(); input.clientVersion = options.clientVersion ?? V1_DSL_Data_Quality_PureGraphManagerExtension.DEV_PROTOCOL_VERSION; input.model = graph.origin ? this.buildPureModelSDLCPointer(graph.origin, undefined) : this.graphManager.getFullGraphModelData(graph); const runningSourceOrTargetQuery = options.runSourceQuery ?? options.runTargetQuery; input.source = this.rawLambdaToV1(options.source); input.target = this.rawLambdaToV1(options.target); input.keys = options.keys; input.colsForHash = options.colsForHash; input.aggregatedHash = options.aggregatedHash; input.sourceHashCol = options.sourceHashCol; input.targetHashCol = options.targetHashCol; input.includeColumnValues = options.includeColumnValues; input.runSourceQuery = options.runSourceQuery; input.runTargetQuery = options.runTargetQuery; if (runningSourceOrTargetQuery) { input.queryLimit = options.limit; } else { input.defectLimit = options.limit; } if (options.sourceLambdaParameterValues) { input.sourceLambdaParameterValues = options.sourceLambdaParameterValues.map(V1_transformParameterValue); } if (options.targetLambdaParameterValues) { input.targetLambdaParameterValues = options.targetLambdaParameterValues.map(V1_transformParameterValue); } return input; } runReconciliation = async (graph, options) => { const input = this.createReconciliationInput(graph, options); return this.runReconciliationWithInput(input); }; runReconciliationSourceQuery = async (graph, options) => { const input = this.createReconciliationInput(graph, { ...options, runSourceQuery: true, runTargetQuery: undefined, }); return this.runReconciliationWithInput(input); }; runReconciliationTargetQuery = async (graph, options) => { const input = this.createReconciliationInput(graph, { ...options, runSourceQuery: undefined, runTargetQuery: true, }); return this.runReconciliationWithInput(input); }; async runReconciliationWithInput(input) { try { const engineServerClient = guaranteeType(this.graphManager.engine, V1_RemoteEngine, 'runReconciliation is only supported by remote engine').getEngineServerClient(); const result = await engineServerClient.postWithTracing(engineServerClient.getTraceData(DQ_EXECUTE_RECONCILIATION), `${engineServerClient._pure()}/dataquality/reconciliation`, V1_DQReconciliationInput.serialization.toJson(input), {}, undefined, undefined, { enableCompression: true }, { skipProcessing: true }); const resultInText = await result.text(); const rawExecutionResult = returnUndefOnError(() => this.graphManager.engine.parseExecutionResults(resultInText, undefined)) ?? resultInText; const v1_executionResult = V1_deserializeExecutionResult(rawExecutionResult); return V1_buildExecutionResult(v1_executionResult); } catch (error) { assertErrorThrown(error); if (error instanceof NetworkClientError) { throw V1_buildExecutionError(V1_ExecutionError.serialization.fromJson(error.payload)); } throw error; } } } //# sourceMappingURL=V1_DSL_Data_Quality_PureGraphManagerExtension.js.map