hana-cli
Version:
HANA Developer Command Line Interface
593 lines (521 loc) • 15.9 kB
JavaScript
// @ts-check
import * as baseLite from '../utils/base-lite.js'
import dbClientClass from "../utils/database/index.js"
import { buildDocEpilogue } from '../utils/doc-linker.js'
export const command = 'dataValidator'
export const aliases = ['dval', 'validateData', 'dataValidation']
export const describe = baseLite.bundle.getText("dataValidator")
export const builder = (yargs) => yargs.options(baseLite.getBuilder({
table: {
alias: ['t'],
type: 'string',
desc: baseLite.bundle.getText("dataValidatorTable")
},
schema: {
alias: ['s'],
type: 'string',
default: '**CURRENT_SCHEMA**',
desc: baseLite.bundle.getText("dataValidatorSchema")
},
rules: {
alias: ['r'],
type: 'string',
desc: baseLite.bundle.getText("dataValidatorRules")
},
rulesFile: {
alias: ['rf'],
type: 'string',
desc: baseLite.bundle.getText("dataValidatorRulesFile")
},
columns: {
alias: ['c'],
type: 'string',
desc: baseLite.bundle.getText("dataValidatorColumns")
},
output: {
alias: ['o'],
type: 'string',
desc: baseLite.bundle.getText("dataValidatorOutput")
},
format: {
alias: ['f'],
choices: ["json", "csv", "summary", "detailed"],
default: "json",
type: 'string',
desc: baseLite.bundle.getText("dataValidatorFormat")
},
limit: {
alias: ['l'],
type: 'number',
default: 10000,
desc: baseLite.bundle.getText("dataValidatorLimit")
},
stopOnFirstError: {
alias: ['sfe'],
type: 'boolean',
default: false,
desc: baseLite.bundle.getText("dataValidatorStopOnFirstError")
},
timeout: {
alias: ['to'],
type: 'number',
default: 3600,
desc: baseLite.bundle.getText("dataValidatorTimeout")
},
profile: {
alias: ['p'],
type: 'string',
desc: baseLite.bundle.getText("profile")
}
})).wrap(160).example('hana-cli dataValidator --table myTable --rules validation.json', baseLite.bundle.getText("dataValidatorExample")).wrap(160).epilog(buildDocEpilogue('dataValidator', 'data-tools', ['import', 'dataProfile']))
export let inputPrompts = {
table: {
description: baseLite.bundle.getText("dataValidatorTable"),
type: 'string',
required: true
},
schema: {
description: baseLite.bundle.getText("dataValidatorSchema"),
type: 'string',
required: false
},
rules: {
description: baseLite.bundle.getText("dataValidatorRules"),
type: 'string',
required: false,
ask: () => false
},
columns: {
description: baseLite.bundle.getText("dataValidatorColumns"),
type: 'string',
required: false,
ask: () => false
},
output: {
description: baseLite.bundle.getText("dataValidatorOutput"),
type: 'string',
required: false,
ask: () => false
},
format: {
description: baseLite.bundle.getText("dataValidatorFormat"),
type: 'string',
required: false,
ask: () => false
},
limit: {
description: baseLite.bundle.getText("dataValidatorLimit"),
type: 'number',
required: false,
default: 10000,
ask: () => false
},
timeout: {
description: baseLite.bundle.getText("dataValidatorTimeout"),
type: 'number',
required: false,
default: 3600,
ask: () => false
},
profile: {
description: baseLite.bundle.getText("profile"),
type: 'string',
required: false,
ask: () => { }
}
}
/**
* Command handler function
* @param {object} argv - Command line arguments from yargs
* @returns {Promise<void>}
*/
export async function handler(argv) {
const base = await import('../utils/base.js')
await base.promptHandler(argv, dataValidatorMain, inputPrompts, true, false)
}
/**
* Validate data against business rules
* @param {object} prompts - User prompts
* @returns {Promise<void>}
*/
export async function dataValidatorMain(prompts) {
const base = await import('../utils/base.js')
base.debug('dataValidatorMain')
let dbClient = null
let timeoutHandle = null
try {
base.setPrompts(prompts)
// Set operation timeout
timeoutHandle = prompts.timeout > 0
? setTimeout(() => process.exit(1), prompts.timeout * 1000)
: null
// Connect to database
dbClient = await dbClientClass.getNewClient(prompts)
await dbClient.connect()
const dbKind = (dbClient.getKind() || 'hana').toLowerCase()
// Get schema if not provided
let schema = prompts.schema
// Handle the **CURRENT_SCHEMA** placeholder
if (!schema || schema === '**CURRENT_SCHEMA**') {
if (dbKind !== 'sqlite') {
schema = await getCurrentSchema(dbClient, dbKind)
}
}
const table = prompts.table
console.log(`Starting data validation for table: ${table}`)
// Get table columns
const tableColumns = await getTableColumns(dbClient, schema, table, dbKind)
// Parse validation rules (use default preset when none provided)
let rulesInput = prompts.rules
if (!rulesInput && !prompts.rulesFile) {
rulesInput = buildDefaultRulesString(tableColumns)
if (rulesInput) {
console.log(`No rules provided. Using default rules preset: ${rulesInput}`)
}
}
const rules = parseValidationRules(rulesInput, prompts.rulesFile)
if (rules.length === 0) {
throw new Error(baseLite.bundle.getText("error.noValidationRules"))
}
// Get data to validate
let query = `SELECT * FROM ${formatQualifiedName(schema, table)}`
if (prompts.limit > 0) {
query += ` LIMIT ${prompts.limit}`
}
const rows = await dbClient.execSQL(query)
// Validate data
const validationResults = validateData(rows, rules, tableColumns, prompts.stopOnFirstError)
// Output results
if (prompts.output) {
await outputValidationResults(prompts.output, validationResults, prompts.format)
} else {
displayValidationResults(validationResults, prompts.format)
}
console.log(`Data validation complete. Total rows: ${validationResults.totalRows}, Valid: ${validationResults.validRows}, Invalid: ${validationResults.invalidRows}, Errors: ${validationResults.totalErrors}`)
await dbClient.disconnect()
if (timeoutHandle) clearTimeout(timeoutHandle)
} catch (error) {
const errorMsg = `Data validation error: ${error.message}`
console.error(errorMsg)
base.debug(error)
if (timeoutHandle) clearTimeout(timeoutHandle)
if (dbClient) {
try {
await dbClient.disconnect()
} catch (e) {
// Ignore disconnect errors
}
}
process.exit(1)
}
}
/**
* Get current schema
* @param {object} dbClient - Database client
* @param {string} dbKind - Database kind
* @returns {Promise<string>}
*/
async function getCurrentSchema(dbClient, dbKind) {
if (dbKind === 'hana') {
const result = await dbClient.execSQL('SELECT CURRENT_SCHEMA FROM DUMMY')
return result[0]?.CURRENT_SCHEMA || 'PUBLIC'
} else if (dbKind === 'postgres') {
const result = await dbClient.execSQL('SELECT current_schema()')
return result[0]?.current_schema || 'public'
}
return 'public'
}
/**
* Get table columns
* @param {object} dbClient - Database client
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @param {string} dbKind - Database kind
* @returns {Promise<Array<string>>}
*/
async function getTableColumns(dbClient, schema, table, dbKind) {
let query
if (dbKind === 'hana') {
query = `SELECT COLUMN_NAME FROM SYS.TABLE_COLUMNS
WHERE SCHEMA_NAME = ? AND TABLE_NAME = ?
ORDER BY POSITION`
const result = await dbClient.execSQL(query, [schema || 'PUBLIC', table.toUpperCase()])
return result.map(r => r.COLUMN_NAME)
} else if (dbKind === 'postgres') {
query = `SELECT column_name FROM information_schema.columns
WHERE table_schema = ? AND table_name = ?
ORDER BY ordinal_position`
const result = await dbClient.execSQL(query, [schema || 'public', table.toLowerCase()])
return result.map(r => r.column_name)
}
return []
}
/**
* Parse validation rules from string or file
* @param {string} rulesStr - Rules string
* @param {string} rulesFile - Rules file path
* @returns {Array<object>}
*/
function parseValidationRules(rulesStr, rulesFile) {
let ruleContent = rulesStr || ''
if (rulesFile) {
try {
const fs = require('fs')
ruleContent = fs.readFileSync(rulesFile, 'utf-8')
} catch (err) {
console.warn(`Could not read rules file: ${err.message}`)
}
}
if (!ruleContent) {
return []
}
// Parse rules in format: column:rule1,rule2;column2:rule3
// Rules: required, numeric, email, date, length:min:max, pattern:regex, range:min:max
const rules = []
const parts = ruleContent.split(';')
for (const part of parts) {
const [column, ruleStr] = part.split(':')
if (!column || !ruleStr) continue
const ruleParts = ruleStr.split(',')
for (const rule of ruleParts) {
const trimmedRule = rule.trim()
rules.push({
column: column.trim(),
rule: trimmedRule
})
}
}
return rules
}
/**
* Build default rules based on column names
* @param {Array<string>} columns - Table columns
* @returns {string}
*/
function buildDefaultRulesString(columns) {
if (!Array.isArray(columns) || columns.length === 0) {
return ''
}
const ruleMap = new Map()
const addRule = (column, rule) => {
if (!ruleMap.has(column)) {
ruleMap.set(column, new Set())
}
ruleMap.get(column).add(rule)
}
for (const column of columns) {
const upper = String(column).toUpperCase()
if (/(^|_)ID$/.test(upper)) {
addRule(column, 'required')
}
if (upper.includes('EMAIL')) {
addRule(column, 'email')
}
if (/(DATE|_AT|_ON)$/.test(upper)) {
addRule(column, 'date')
}
if (/(AMOUNT|PRICE|TOTAL|COUNT|QTY|QUANTITY)$/.test(upper)) {
addRule(column, 'numeric')
}
}
if (ruleMap.size === 0) {
addRule(columns[0], 'required')
}
return Array.from(ruleMap.entries())
.map(([column, rules]) => `${column}:${Array.from(rules).join(',')}`)
.join(';')
}
/**
* Validate data against rules
* @param {Array<object>} rows - Data rows
* @param {Array<object>} rules - Validation rules
* @param {Array<string>} columns - Table columns
* @param {boolean} stopOnFirstError - Stop on first error
* @returns {object}
*/
function validateData(rows, rules, columns, stopOnFirstError) {
const results = {
totalRows: rows.length,
validRows: 0,
invalidRows: 0,
totalErrors: 0,
errors: []
}
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
const row = rows[rowIdx]
let rowValid = true
for (const ruleObj of rules) {
const value = row[ruleObj.column]
const error = validateValue(value, ruleObj.rule, ruleObj.column)
if (error) {
rowValid = false
results.totalErrors++
results.errors.push({
rowNumber: rowIdx + 1,
column: ruleObj.column,
value: value,
rule: ruleObj.rule,
error: error
})
if (stopOnFirstError) {
break
}
}
}
if (rowValid) {
results.validRows++
} else {
results.invalidRows++
}
if (stopOnFirstError && results.totalErrors > 0) {
break
}
}
return results
}
/**
* Validate a single value against a rule
* @param {*} value - Value to validate
* @param {string} rule - Validation rule
* @param {string} column - Column name
* @returns {string|null}
*/
function validateValue(value, rule, column) {
const ruleName = rule.split(':')[0]
switch (ruleName) {
case 'required':
if (value === null || value === undefined || value === '') {
return `Column ${column} is required`
}
break
case 'numeric':
if (value !== null && isNaN(Number(value))) {
return `Column ${column} must be numeric, got: ${value}`
}
break
case 'email':
if (value && !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(String(value))) {
return `Column ${column} must be valid email`
}
break
case 'date':
if (value && isNaN(Date.parse(String(value)))) {
return `Column ${column} must be valid date`
}
break
case 'length':
{
const parts = rule.split(':')
if (parts.length >= 3) {
const minLen = parseInt(parts[1])
const maxLen = parseInt(parts[2])
const len = String(value).length
if (len < minLen || len > maxLen) {
return `Column ${column} length must be between ${minLen} and ${maxLen}`
}
}
}
break
case 'pattern':
{
const parts = rule.split(':')
if (parts.length >= 2) {
const pattern = parts[1]
if (value && !new RegExp(pattern).test(String(value))) {
return `Column ${column} does not match pattern: ${pattern}`
}
}
}
break
case 'range':
{
const parts = rule.split(':')
if (parts.length >= 3) {
const min = Number(parts[1])
const max = Number(parts[2])
const val = Number(value)
if (val < min || val > max) {
return `Column ${column} must be between ${min} and ${max}`
}
}
}
break
}
return null
}
/**
* Format qualified table name
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @returns {string}
*/
function formatQualifiedName(schema, table) {
if (schema) {
return `"${schema}"."${table}"`
}
return `"${table}"`
}
/**
* Output validation results to file
* @param {string} filePath - Output file path
* @param {object} results - Validation results
* @param {string} format - Output format
* @returns {Promise<void>}
*/
async function outputValidationResults(filePath, results, format) {
const fsModule = await import('fs')
const fs = fsModule.promises
let content
if (format === 'json') {
content = JSON.stringify(results, null, 2)
} else if (format === 'csv') {
content = 'Row,Column,Value,Rule,Error\n'
for (const error of results.errors) {
content += `${error.rowNumber},"${error.column}","${String(error.value).replace(/"/g, '""')}","${error.rule}","${error.error}"\n`
}
} else {
content = formatSummaryReport(results)
}
await fs.writeFile(filePath, content)
}
/**
* Format summary report for display
* @param {object} results - Validation results
* @returns {string}
*/
function formatSummaryReport(results) {
let report = 'Data Validation Report\n'
report += '=======================\n\n'
report += `Total Rows: ${results.totalRows}\n`
report += `Valid Rows: ${results.validRows}\n`
report += `Invalid Rows: ${results.invalidRows}\n`
report += `Total Errors: ${results.totalErrors}\n\n`
if (results.errors.length > 0) {
report += 'Errors:\n'
for (const error of results.errors.slice(0, 100)) {
report += ` Row ${error.rowNumber}, Column ${error.column}: ${error.error}\n`
}
if (results.errors.length > 100) {
report += ` ... and ${results.errors.length - 100} more errors\n`
}
}
return report
}
/**
* Display validation results in console
* @param {object} results - Validation results
* @param {string} format - Display format
* @returns {void}
*/
function displayValidationResults(results, format) {
if (format === 'detailed' || format === 'json') {
console.log(JSON.stringify(results, null, 2))
} else if (format === 'csv') {
console.log('Row,Column,Value,Rule,Error')
for (const error of results.errors) {
console.log(`${error.rowNumber},"${error.column}","${String(error.value).replace(/"/g, '""')}","${error.rule}","${error.error}"`)
}
} else {
console.log(formatSummaryReport(results))
}
}