hana-cli
Version:
HANA Developer Command Line Interface
511 lines (462 loc) • 14.3 kB
JavaScript
// @ts-check
import * as baseLite from '../utils/base-lite.js'
import dbClientClass from "../utils/database/index.js"
import { buildDocEpilogue } from '../utils/doc-linker.js'
export const command = 'dataDiff'
export const aliases = ['ddiff', 'diffData', 'dataCompare']
export const describe = baseLite.bundle.getText("dataDiff")
export const builder = (yargs) => yargs.options(baseLite.getBuilder({
table1: {
alias: ['t1'],
type: 'string',
desc: baseLite.bundle.getText("dataDiffTable1")
},
table2: {
alias: ['t2'],
type: 'string',
desc: baseLite.bundle.getText("dataDiffTable2")
},
schema1: {
alias: ['s1'],
type: 'string',
default: '**CURRENT_SCHEMA**',
desc: baseLite.bundle.getText("dataDiffSchema1")
},
schema2: {
alias: ['s2'],
type: 'string',
default: '**CURRENT_SCHEMA**',
desc: baseLite.bundle.getText("dataDiffSchema2")
},
keyColumns: {
alias: ['k'],
type: 'string',
desc: baseLite.bundle.getText("dataDiffKeyColumns")
},
compareColumns: {
alias: ['c'],
type: 'string',
desc: baseLite.bundle.getText("dataDiffCompareColumns")
},
output: {
alias: ['o'],
type: 'string',
desc: baseLite.bundle.getText("dataDiffOutput")
},
format: {
alias: ['f'],
choices: ["json", "csv", "summary"],
default: "summary",
type: 'string',
desc: baseLite.bundle.getText("dataDiffFormat")
},
limit: {
alias: ['l'],
type: 'number',
default: 10000,
desc: baseLite.bundle.getText("dataDiffLimit")
},
showValues: {
alias: ['sv'],
type: 'boolean',
default: false,
desc: baseLite.bundle.getText("dataDiffShowValues")
},
dryRun: {
alias: ['dr', 'preview'],
type: 'boolean',
default: false,
desc: baseLite.bundle.getText("dryRun")
},
timeout: {
alias: ['to'],
type: 'number',
default: 3600,
desc: baseLite.bundle.getText("dataDiffTimeout")
},
profile: {
alias: ['p'],
type: 'string',
desc: baseLite.bundle.getText("profile")
}
})).wrap(160).example('hana-cli dataDiff --table1 source_data --table2 target_data', baseLite.bundle.getText("dataDiffExample")).wrap(160).epilog(buildDocEpilogue('dataDiff', 'data-tools', ['compareData', 'dataValidator']))
const buildInputPrompts = () => ({
table1: {
description: baseLite.bundle.getText("dataDiffTable1"),
type: 'string',
required: true
},
table2: {
description: baseLite.bundle.getText("dataDiffTable2"),
type: 'string',
required: true
},
schema1: {
description: baseLite.bundle.getText("dataDiffSchema1"),
type: 'string',
required: false
},
schema2: {
description: baseLite.bundle.getText("dataDiffSchema2"),
type: 'string',
required: false
},
keyColumns: {
description: baseLite.bundle.getText("dataDiffKeyColumns"),
type: 'string',
required: true
},
compareColumns: {
description: baseLite.bundle.getText("dataDiffCompareColumns"),
type: 'string',
required: false,
ask: () => false
},
output: {
description: baseLite.bundle.getText("dataDiffOutput"),
type: 'string',
required: false,
ask: () => false
},
format: {
description: baseLite.bundle.getText("dataDiffFormat"),
type: 'string',
required: false,
ask: () => false
},
showValues: {
description: baseLite.bundle.getText("dataDiffShowValues"),
type: 'boolean',
required: false,
default: false,
ask: () => false
},
limit: {
description: baseLite.bundle.getText("dataDiffLimit"),
type: 'number',
required: false,
default: 10000,
ask: () => false
},
timeout: {
description: baseLite.bundle.getText("dataDiffTimeout"),
type: 'number',
required: false,
default: 3600,
ask: () => false
},
profile: {
description: baseLite.bundle.getText("profile"),
type: 'string',
required: false,
ask: () => { }
},
dryRun: {
description: baseLite.bundle.getText("dryRun"),
type: 'boolean',
required: false,
ask: () => false
}
})
export const inputPrompts = Object.freeze(buildInputPrompts())
/**
* Command handler function
* @param {object} argv - Command line arguments from yargs
* @returns {Promise<void>}
*/
export async function handler(argv) {
const base = await import('../utils/base.js')
base.promptHandler(argv, dataDiffMain, inputPrompts)
}
/**
* Show differences between two datasets
* @param {object} prompts - User prompts
* @returns {Promise<void>}
*/
export async function dataDiffMain(prompts) {
const base = await import('../utils/base.js')
base.debug('dataDiffMain')
try {
base.setPrompts(prompts)
// Set operation timeout
const timeoutHandle = prompts.timeout > 0
? setTimeout(() => process.exit(1), prompts.timeout * 1000)
: null
// Connect to database
const dbClient = await dbClientClass.getNewClient(prompts)
await dbClient.connect()
const dbKind = (dbClient.getKind() || 'hana').toLowerCase()
// Get schemas if not provided
let schema1 = prompts.schema1
let schema2 = prompts.schema2
if (schema1 === '**CURRENT_SCHEMA**') {
schema1 = null
}
if (schema2 === '**CURRENT_SCHEMA**') {
schema2 = null
}
if (!schema1 && dbKind !== 'sqlite') {
schema1 = await getCurrentSchema(dbClient, dbKind)
}
if (!schema2 && dbKind !== 'sqlite') {
schema2 = schema1
}
const table1 = prompts.table1
const table2 = prompts.table2
const keyColumns = prompts.keyColumns.split(',').map(c => c.trim()).filter(c => c)
if (keyColumns.length === 0) {
throw new Error(baseLite.bundle.getText("errNoKeyColumns"))
}
console.log(baseLite.bundle.getText("info.startingDataDiff", [table1, table2]))
// Get columns
const columns1 = await getTableColumns(dbClient, schema1, table1, dbKind)
const columns2 = await getTableColumns(dbClient, schema2, table2, dbKind)
// Determine columns to compare
let compareColumns = columns1.filter(c => columns2.includes(c))
if (prompts.compareColumns) {
const selected = prompts.compareColumns.split(',').map(c => c.trim()).filter(c => c)
compareColumns = compareColumns.filter(c => selected.includes(c))
}
// Perform diff
const diffs = await performDataDiff(
dbClient,
schema1,
table1,
schema2,
table2,
keyColumns,
compareColumns,
prompts.limit,
dbKind
)
// Output results
if (prompts.output) {
await outputDiffResults(prompts.output, diffs, prompts.format)
} else {
displayDiffResults(diffs, prompts.format, prompts.showValues)
}
console.log(baseLite.bundle.getText("success.dataDiffComplete", [
diffs.identical,
diffs.different,
diffs.onlyInTable1.length,
diffs.onlyInTable2.length
]))
await dbClient.disconnect()
if (timeoutHandle) clearTimeout(timeoutHandle)
} catch (error) {
console.error(baseLite.bundle.getText("error.dataDiff", [error.message]))
base.debug(error)
throw error
}
}
/**
* Get table columns
* @param {object} dbClient - Database client
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @param {string} dbKind - Database kind
* @returns {Promise<Array<string>>}
*/
async function getTableColumns(dbClient, schema, table, dbKind) {
let query
if (dbKind === 'hana') {
query = `SELECT COLUMN_NAME FROM SYS.TABLE_COLUMNS
WHERE SCHEMA_NAME = ? AND TABLE_NAME = ?
ORDER BY POSITION`
const result = await dbClient.execSQL(query, [schema || 'PUBLIC', table.toUpperCase()])
return result.map(r => r.COLUMN_NAME)
} else if (dbKind === 'postgres') {
query = `SELECT column_name FROM information_schema.columns
WHERE table_schema = ? AND table_name = ?
ORDER BY ordinal_position`
const result = await dbClient.execSQL(query, [schema || 'public', table.toLowerCase()])
return result.map(r => r.column_name)
}
return []
}
/**
* Perform data diff between two tables
* @param {object} dbClient - Database client
* @param {string|null} schema1 - Schema 1
* @param {string} table1 - Table 1
* @param {string|null} schema2 - Schema 2
* @param {string} table2 - Table 2
* @param {Array<string>} keyColumns - Key columns
* @param {Array<string>} compareColumns - Columns to compare
* @param {number} limit - Row limit
* @param {string} dbKind - Database kind
* @returns {Promise<object>}
*/
async function performDataDiff(dbClient, schema1, table1, schema2, table2, keyColumns, compareColumns, limit, dbKind) {
const diffs = {
identical: 0,
different: 0,
onlyInTable1: [],
onlyInTable2: [],
differences: []
}
try {
// Get rows from both tables
const keyList = keyColumns.map(k => `"${k}"`).join(',')
const colList = [keyList, ...compareColumns.map(c => `"${c}"`)].join(',')
const table1Name = formatQualifiedName(schema1, table1)
const table2Name = formatQualifiedName(schema2, table2)
const query1 = `SELECT ${colList} FROM ${table1Name} LIMIT ${limit}`
const query2 = `SELECT ${colList} FROM ${table2Name} LIMIT ${limit}`
const rows1 = await dbClient.execSQL(query1)
const rows2 = await dbClient.execSQL(query2)
// Build maps
const map1 = new Map()
for (const row of rows1) {
const keyValue = keyColumns.map(k => row[k]).join('|')
map1.set(keyValue, row)
}
const map2 = new Map()
for (const row of rows2) {
const keyValue = keyColumns.map(k => row[k]).join('|')
map2.set(keyValue, row)
}
// Compare
for (const [keyValue, row1] of map1) {
if (map2.has(keyValue)) {
const row2 = map2.get(keyValue)
const diffs_list = []
for (const col of compareColumns) {
if (JSON.stringify(row1[col]) !== JSON.stringify(row2[col])) {
diffs_list.push({
column: col,
table1Value: row1[col],
table2Value: row2[col]
})
}
}
if (diffs_list.length > 0) {
diffs.different++
diffs.differences.push({
key: keyValue,
changes: diffs_list
})
} else {
diffs.identical++
}
map2.delete(keyValue)
} else {
diffs.onlyInTable1.push({
key: keyValue,
row: row1
})
}
}
// Remaining in map2
for (const [keyValue, row] of map2) {
diffs.onlyInTable2.push({
key: keyValue,
row: row
})
}
} catch (error) {
baseLite.debug(`Error performing data diff: ${error.message}`)
throw error
}
return diffs
}
/**
* Display diff results
* @param {object} diffs - Diff results
* @param {string} format - Display format
* @param {boolean} showValues - Whether to show values
* @returns {void}
*/
function displayDiffResults(diffs, format = 'summary', showValues = false) {
if (format === 'summary') {
console.log(`\n${baseLite.colors.green('Identical:')} ${diffs.identical}`)
console.log(`${baseLite.colors.yellow('Different:')} ${diffs.different}`)
console.log(`${baseLite.colors.red('Only in Table 1:')} ${diffs.onlyInTable1.length}`)
console.log(`${baseLite.colors.cyan('Only in Table 2:')} ${diffs.onlyInTable2.length}`)
if (diffs.different > 0 && showValues) {
console.log(`\n${baseLite.colors.yellow('First 5 differences:')}`)
diffs.differences.slice(0, 5).forEach(diff => {
console.log(` Key: ${diff.key}`)
diff.changes.forEach(c => {
console.log(` ${c.column}: ${c.table1Value} -> ${c.table2Value}`)
})
})
}
} else if (format === 'csv') {
console.log('type,key,column,table1Value,table2Value')
diffs.differences.forEach(diff => {
diff.changes.forEach(c => {
console.log(`difference,${diff.key},${c.column},"${c.table1Value}","${c.table2Value}"`)
})
})
diffs.onlyInTable1.forEach(item => {
console.log(`onlyInTable1,${item.key},,`)
})
diffs.onlyInTable2.forEach(item => {
console.log(`onlyInTable2,${item.key},,`)
})
} else if (format === 'json') {
console.log(JSON.stringify(diffs, null, 2))
}
}
/**
* Output diff results to file
* @param {string} filePath - Output file path
* @param {object} diffs - Diff results
* @param {string} format - Output format
* @returns {Promise<void>}
*/
async function outputDiffResults(filePath, diffs, format = 'json') {
const fs = await import('fs')
if (format === 'json') {
await fs.promises.writeFile(filePath, JSON.stringify(diffs, null, 2), 'utf8')
} else if (format === 'csv') {
let csv = 'type,key,column,table1Value,table2Value\n'
diffs.differences.forEach(diff => {
diff.changes.forEach(c => {
csv += `difference,${diff.key},${c.column},"${c.table1Value}","${c.table2Value}"\n`
})
})
diffs.onlyInTable1.forEach(item => {
csv += `onlyInTable1,${item.key},,\n`
})
diffs.onlyInTable2.forEach(item => {
csv += `onlyInTable2,${item.key},,\n`
})
await fs.promises.writeFile(filePath, csv, 'utf8')
} else {
await fs.promises.writeFile(filePath, JSON.stringify(diffs, null, 2), 'utf8')
}
}
/**
* Get current schema
* @param {object} dbClient - Database client
* @param {string} dbKind - Database kind
* @returns {Promise<string|null>}
*/
async function getCurrentSchema(dbClient, dbKind) {
try {
if (dbKind === 'hana') {
const result = await dbClient.execSQL("SELECT CURRENT_SCHEMA FROM DUMMY")
return result?.[0]?.CURRENT_SCHEMA || null
} else if (dbKind === 'postgres') {
const result = await dbClient.execSQL("SELECT current_schema()")
return result?.[0]?.current_schema || null
}
} catch (error) {
baseLite.debug(`Error getting current schema: ${error.message}`)
}
return null
}
/**
* Format qualified table name (schema.table)
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @returns {string}
*/
function formatQualifiedName(schema, table) {
if (schema) {
return `"${schema}"."${table}"`
}
return `"${table}"`
}