hana-cli
Version:
HANA Developer Command Line Interface
633 lines (578 loc) • 18.2 kB
JavaScript
// @ts-check
import * as baseLite from '../utils/base-lite.js'
import dbClientClass from "../utils/database/index.js"
import { buildDocEpilogue } from '../utils/doc-linker.js'
export const command = 'dataLineage'
export const aliases = ['lineage', 'dataFlow', 'traceLineage']
export const describe = baseLite.bundle.getText("dataLineage")
const dataLineageOptions = {
table: {
alias: ['t'],
type: 'string',
desc: baseLite.bundle.getText("dataLineageTable")
},
schema: {
alias: ['s'],
type: 'string',
default: '**CURRENT_SCHEMA**',
desc: baseLite.bundle.getText("dataLineageSchema")
},
direction: {
alias: ['dir'],
choices: ["upstream", "downstream", "bidirectional"],
default: "upstream",
type: 'string',
desc: baseLite.bundle.getText("dataLineageDirection")
},
depth: {
alias: ['dp'],
type: 'number',
default: 5,
desc: baseLite.bundle.getText("dataLineageDepth")
},
includeTransformations: {
alias: ['it'],
type: 'boolean',
default: true,
desc: baseLite.bundle.getText("dataLineageIncludeTransformations")
},
output: {
alias: ['o'],
type: 'string',
desc: baseLite.bundle.getText("dataLineageOutput")
},
format: {
alias: ['f'],
choices: ["json", "csv", "graphml", "summary"],
default: "summary",
type: 'string',
desc: baseLite.bundle.getText("dataLineageFormat")
},
timeout: {
alias: ['to'],
type: 'number',
default: 3600,
desc: baseLite.bundle.getText("dataLineageTimeout")
},
profile: {
alias: ['p'],
type: 'string',
desc: baseLite.bundle.getText("profile")
}
}
export const builder = (yargs) => yargs.options(baseLite.getBuilder(dataLineageOptions))
.example('hana-cli dataLineage --table myTable --depth 3', baseLite.bundle.getText("dataLineageExample")).wrap(160).epilog(buildDocEpilogue('dataLineage', 'data-tools', ['dataProfile', 'compareData']))
export let inputPrompts = {
table: {
description: baseLite.bundle.getText("dataLineageTable"),
type: 'string',
required: true
},
schema: {
description: baseLite.bundle.getText("dataLineageSchema"),
type: 'string',
required: false
},
direction: {
description: baseLite.bundle.getText("dataLineageDirection"),
type: 'string',
required: false,
ask: () => false
},
depth: {
description: baseLite.bundle.getText("dataLineageDepth"),
type: 'string',
required: false,
ask: () => false
},
includeTransformations: {
description: baseLite.bundle.getText("dataLineageIncludeTransformations"),
type: 'boolean',
required: false,
default: true,
ask: () => false
},
output: {
description: baseLite.bundle.getText("dataLineageOutput"),
type: 'string',
required: false,
ask: () => false
},
format: {
description: baseLite.bundle.getText("dataLineageFormat"),
type: 'string',
required: false,
ask: () => false
},
timeout: {
description: baseLite.bundle.getText("dataLineageTimeout"),
type: 'number',
required: false,
default: 3600,
ask: () => false
},
profile: {
description: baseLite.bundle.getText("profile"),
type: 'string',
required: false,
ask: () => { }
}
}
/**
* Command handler function
* @param {object} argv - Command line arguments from yargs
* @returns {Promise<void>}
*/
export async function handler(argv) {
const base = await import('../utils/base.js')
base.promptHandler(argv, dataLineageMain, inputPrompts, true, true, dataLineageOptions)
}
/**
* Trace data lineage and transformations
* @param {object} prompts - User prompts
* @returns {Promise<void>}
*/
export async function dataLineageMain(prompts) {
const base = await import('../utils/base.js')
base.debug('dataLineageMain')
try {
base.setPrompts(prompts)
// Set operation timeout
const timeoutHandle = prompts.timeout > 0
? setTimeout(() => process.exit(1), prompts.timeout * 1000)
: null
// Connect to database
const dbClient = await dbClientClass.getNewClient(prompts)
await dbClient.connect()
const dbKind = (dbClient.getKind() || 'hana').toLowerCase()
// Get schema if not provided
let schema = prompts.schema
if (schema === '**CURRENT_SCHEMA**') {
schema = null
}
if (!schema && dbKind !== 'sqlite') {
schema = await getCurrentSchema(dbClient, dbKind)
}
const table = prompts.table
const depth = prompts.depth || 5
const direction = prompts.direction || 'upstream'
const includeTransformations = typeof prompts.includeTransformations === 'boolean'
? prompts.includeTransformations
: true
console.log(baseLite.bundle.getText("info.startingLineageTrace", [table, direction]))
// Get lineage information
const lineage = await traceLineage(
dbClient,
schema,
table,
direction,
depth,
includeTransformations,
dbKind
)
// Output results
if (prompts.output) {
await outputResults(prompts.output, lineage, prompts.format)
} else {
displayResults(lineage, prompts.format)
}
console.log(baseLite.bundle.getText("success.lineageTraceComplete", [
lineage.sourceCount,
lineage.transformationCount,
lineage.targetCount
]))
await dbClient.disconnect()
if (timeoutHandle) clearTimeout(timeoutHandle)
} catch (error) {
console.error(baseLite.bundle.getText("error.dataLineage", [error.message]))
base.debug(error)
throw error
}
}
/**
* Get current schema
* @param {object} dbClient - Database client
* @param {string} dbKind - Database kind
* @returns {Promise<string>}
*/
async function getCurrentSchema(dbClient, dbKind) {
if (dbKind === 'hana') {
const result = await dbClient.execSQL('SELECT CURRENT_SCHEMA FROM DUMMY')
return result[0]?.CURRENT_SCHEMA || 'PUBLIC'
} else if (dbKind === 'postgres') {
const result = await dbClient.execSQL('SELECT current_schema()')
return result[0]?.current_schema || 'public'
}
return 'public'
}
/**
* Trace data lineage
* @param {object} dbClient - Database client
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @param {string} direction - Lineage direction (upstream, downstream, bidirectional)
* @param {number} depth - Lineage depth
* @param {string} dbKind - Database kind
* @returns {Promise<object>}
*/
async function traceLineage(dbClient, schema, table, direction, depth, includeTransformations, dbKind) {
const lineage = {
rootTable: table,
direction: direction,
depth: depth,
sourceCount: 0,
targetCount: 0,
transformationCount: 0,
nodes: [],
edges: [],
transformations: []
}
const visited = new Set()
const queue = []
// Add root node
const rootNode = {
id: `${schema}.${table}`,
name: table,
schema: schema,
type: 'table',
level: 0
}
lineage.nodes.push(rootNode)
visited.add(rootNode.id)
queue.push({ node: rootNode, level: 0 })
while (queue.length > 0 && lineage.nodes.length < 100) {
const current = queue.shift()
if (current.level >= depth) continue
try {
if (direction === 'upstream' || direction === 'bidirectional') {
// Find source tables
const sources = await getSourceTables(dbClient, schema, table, dbKind)
for (const source of sources) {
const sourceNodeId = `${source.schema}.${source.name}`
if (!visited.has(sourceNodeId)) {
const sourceNode = {
id: sourceNodeId,
name: source.name,
schema: source.schema,
type: 'table',
level: current.level + 1
}
lineage.nodes.push(sourceNode)
visited.add(sourceNodeId)
lineage.edges.push({
source: sourceNodeId,
target: current.node.id,
type: 'data_flow',
label: source.joinType || 'join'
})
queue.push({ node: sourceNode, level: current.level + 1 })
lineage.sourceCount++
}
}
// Find views/transformations
if (includeTransformations) {
const transforms = await getTransformations(dbClient, schema, table, dbKind)
for (const transform of transforms) {
lineage.transformations.push({
source: table,
transformation: transform.name,
type: transform.type,
definition: transform.definition
})
lineage.transformationCount++
}
}
}
if (direction === 'downstream' || direction === 'bidirectional') {
// Find target tables
const targets = await getTargetTables(dbClient, schema, table, dbKind)
for (const target of targets) {
const targetNodeId = `${target.schema}.${target.name}`
if (!visited.has(targetNodeId)) {
const targetNode = {
id: targetNodeId,
name: target.name,
schema: target.schema,
type: 'table',
level: current.level + 1
}
lineage.nodes.push(targetNode)
visited.add(targetNodeId)
lineage.edges.push({
source: current.node.id,
target: targetNodeId,
type: 'data_flow',
label: 'depends_on'
})
queue.push({ node: targetNode, level: current.level + 1 })
lineage.targetCount++
}
}
}
} catch (err) {
baseLite.debug(`Error tracing lineage: ${err.message}`)
}
}
return lineage
}
/**
* Get source tables (upstream dependencies)
* @param {object} dbClient - Database client
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @param {string} dbKind - Database kind
* @returns {Promise<Array<object>>}
*/
async function getSourceTables(dbClient, schema, table, dbKind) {
const sources = []
if (dbKind === 'hana') {
try {
const query = `
SELECT DISTINCT
DEPENDENT_SCHEMA_NAME AS schema_name,
DEPENDENT_OBJECT_NAME AS table_name
FROM SYS.OBJECT_DEPENDENCIES
WHERE BASE_SCHEMA_NAME = ?
AND BASE_OBJECT_NAME = ?
AND BASE_OBJECT_NAME != DEPENDENT_OBJECT_NAME
LIMIT 50
`
const result = await dbClient.execSQL(query, [schema || 'PUBLIC', table.toUpperCase()])
return result.map(r => ({
name: r.table_name || r.TABLE_NAME,
schema: r.schema_name || r.SCHEMA_NAME,
joinType: 'reference'
}))
} catch (err) {
baseLite.debug(`Error getting source tables: ${err.message}`)
}
} else if (dbKind === 'postgres') {
try {
const query = `
SELECT DISTINCT
t.table_schema,
t.table_name
FROM information_schema.tables t
WHERE t.table_schema = ?
LIMIT 50
`
const result = await dbClient.execSQL(query, [schema || 'public'])
return result.map(r => ({
name: r.table_name,
schema: r.table_schema,
joinType: 'reference'
}))
} catch (err) {
baseLite.debug(`Error getting source tables: ${err.message}`)
}
}
return sources
}
/**
* Get target tables (downstream dependencies)
* @param {object} dbClient - Database client
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @param {string} dbKind - Database kind
* @returns {Promise<Array<object>>}
*/
async function getTargetTables(dbClient, schema, table, dbKind) {
const targets = []
if (dbKind === 'hana') {
try {
const query = `
SELECT DISTINCT
BASE_SCHEMA_NAME AS schema_name,
BASE_OBJECT_NAME AS table_name
FROM SYS.OBJECT_DEPENDENCIES
WHERE DEPENDENT_SCHEMA_NAME = ?
AND DEPENDENT_OBJECT_NAME = ?
AND BASE_OBJECT_NAME != DEPENDENT_OBJECT_NAME
LIMIT 50
`
const result = await dbClient.execSQL(query, [schema || 'PUBLIC', table.toUpperCase()])
return result.map(r => ({
name: r.table_name || r.TABLE_NAME,
schema: r.schema_name || r.SCHEMA_NAME,
joinType: 'reference'
}))
} catch (err) {
baseLite.debug(`Error getting target tables: ${err.message}`)
}
} else if (dbKind === 'postgres') {
try {
const query = `
SELECT DISTINCT
t.table_schema,
t.table_name
FROM information_schema.tables t
WHERE t.table_schema = ?
LIMIT 50
`
const result = await dbClient.execSQL(query, [schema || 'public'])
return result.map(r => ({
name: r.table_name,
schema: r.table_schema,
joinType: 'reference'
}))
} catch (err) {
baseLite.debug(`Error getting target tables: ${err.message}`)
}
}
return targets
}
/**
* Get transformations (views, procedures, functions)
* @param {object} dbClient - Database client
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @param {string} dbKind - Database kind
* @returns {Promise<Array<object>>}
*/
async function getTransformations(dbClient, schema, table, dbKind) {
const transformations = []
if (dbKind === 'hana') {
try {
// Get views that reference this table
const query = `
SELECT
VIEW_NAME,
VIEW_DEFINITION,
'VIEW' AS type
FROM SYS.VIEWS
WHERE SCHEMA_NAME = ?
AND VIEW_DEFINITION LIKE ?
LIMIT 50
`
const result = await dbClient.execSQL(query, [
schema || 'PUBLIC',
`%${table.toUpperCase()}%`
])
return result.map(r => ({
name: r.VIEW_NAME,
type: r.type,
definition: r.VIEW_DEFINITION
}))
} catch (err) {
baseLite.debug(`Error getting transformations: ${err.message}`)
}
}
return transformations
}
/**
* Format qualified table name
* @param {string|null} schema - Schema name
* @param {string} table - Table name
* @returns {string}
*/
function formatQualifiedName(schema, table) {
if (schema) {
return `"${schema}"."${table}"`
}
return `"${table}"`
}
/**
* Output results to file
* @param {string} filePath - Output file path
* @param {object} lineage - Lineage data
* @param {string} format - Output format
* @returns {Promise<void>}
*/
async function outputResults(filePath, lineage, format) {
const fsModule = await import('fs')
const fs = fsModule.promises
let content
if (format === 'json') {
content = JSON.stringify(lineage, null, 2)
} else if (format === 'csv') {
content = 'Source,Target,Type,Label\n'
for (const edge of lineage.edges) {
content += `"${edge.source}","${edge.target}","${edge.type}","${edge.label}"\n`
}
} else if (format === 'graphml') {
content = generateGraphML(lineage)
} else {
content = formatSummaryReport(lineage)
}
await fs.writeFile(filePath, content)
}
/**
* Generate GraphML format for lineage
* @param {object} lineage - Lineage data
* @returns {string}
*/
function generateGraphML(lineage) {
let graphml = '<?xml version="1.0" encoding="UTF-8"?>\n'
graphml += '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">\n'
graphml += '<graph edgedefault="directed">\n'
// Add nodes
for (const node of lineage.nodes) {
graphml += ` <node id="${node.id}" label="${node.name}"/>\n`
}
// Add edges
for (const edge of lineage.edges) {
graphml += ` <edge source="${edge.source}" target="${edge.target}" label="${edge.label}"/>\n`
}
graphml += '</graph>\n'
graphml += '</graphml>\n'
return graphml
}
/**
* Format summary report
* @param {object} lineage - Lineage data
* @returns {string}
*/
function formatSummaryReport(lineage) {
let report = `${baseLite.colors.bold('Data Lineage Report')}\n`
report += `${baseLite.colors.bold('===================')}\n\n`
report += `${baseLite.colors.cyan('Root Table:')} ${lineage.rootTable}\n`
report += `${baseLite.colors.cyan('Direction:')} ${lineage.direction}\n`
report += `${baseLite.colors.cyan('Depth:')} ${lineage.depth}\n\n`
report += `${baseLite.colors.green('Source Tables:')} ${lineage.sourceCount}\n`
report += `${baseLite.colors.green('Target Tables:')} ${lineage.targetCount}\n`
report += `${baseLite.colors.green('Transformations:')} ${lineage.transformationCount}\n\n`
if (lineage.nodes.length > 0) {
report += `${baseLite.colors.bold('Nodes:')}\n`
const nodesToShow = lineage.nodes.slice(0, 20)
for (const node of nodesToShow) {
const indent = ' '.repeat(Math.max(0, Number(node.level) || 0))
const label = node.schema && node.name
? `${node.schema}.${node.name}`
: node.id
report += `${indent}- ${label} ${baseLite.colors.gray(`(Level ${node.level})`)}\n`
}
if (lineage.nodes.length > 20) {
report += ` ${baseLite.colors.gray(`... and ${lineage.nodes.length - 20} more nodes`)}\n`
}
}
if (lineage.transformations.length > 0) {
report += `\n${baseLite.colors.bold('Transformations:')}\n`
for (const t of lineage.transformations.slice(0, 10)) {
report += ` - ${t.type}: ${t.transformation}\n`
}
if (lineage.transformations.length > 10) {
report += ` ${baseLite.colors.gray(`... and ${lineage.transformations.length - 10} more transformations`)}\n`
}
}
return report
}
/**
* Display results in console
* @param {object} lineage - Lineage data
* @param {string} format - Display format
* @returns {void}
*/
function displayResults(lineage, format) {
if (format === 'json') {
console.log(JSON.stringify(lineage, null, 2))
} else if (format === 'csv') {
console.log('Source,Target,Type,Label')
for (const edge of lineage.edges) {
console.log(`"${edge.source}","${edge.target}","${edge.type}","${edge.label}"`)
}
} else if (format === 'graphml') {
console.log(generateGraphML(lineage))
} else {
console.log(formatSummaryReport(lineage))
}
}