UNPKG

hana-cli

Version:
1,373 lines (1,249 loc) 44.8 kB
// @ts-check import * as baseLite from '../utils/base-lite.js' import dbClientClass from "../utils/database/index.js" import ExcelJS from 'exceljs' import { parse } from 'csv-parse' import { buildDocEpilogue } from '../utils/doc-linker.js' export const command = 'import' export const aliases = ['imp', 'uploadData', 'uploaddata'] export const describe = baseLite.bundle.getText("import") export const builder = (yargs) => yargs.options(baseLite.getBuilder({ filename: { alias: ['n'], type: 'string', desc: baseLite.bundle.getText("importFilename") }, table: { alias: ['t'], type: 'string', desc: baseLite.bundle.getText("importTable") }, schema: { alias: ['s'], type: 'string', default: '**CURRENT_SCHEMA**', desc: baseLite.bundle.getText("importSchema") }, output: { alias: ['o'], choices: ["csv", "excel"], default: "csv", type: 'string', desc: baseLite.bundle.getText("importOutputFormat") }, matchMode: { alias: ['m'], choices: ["order", "name", "auto"], default: "auto", type: 'string', desc: baseLite.bundle.getText("importMatchMode") }, truncate: { alias: ['tr'], type: 'boolean', default: false, desc: baseLite.bundle.getText("importTruncate") }, batchSize: { alias: ['b'], type: 'number', default: 1000, desc: baseLite.bundle.getText("importBatchSize") }, worksheet: { alias: ['w'], type: 'number', default: 1, desc: baseLite.bundle.getText("importWorksheet") }, startRow: { alias: ['sr'], type: 'number', default: 1, desc: baseLite.bundle.getText("importStartRow") }, skipEmptyRows: { alias: ['se'], type: 'boolean', default: true, desc: baseLite.bundle.getText("importSkipEmptyRows") }, excelCacheMode: { alias: ['ec'], choices: ["cache", "emit", "ignore"], default: "cache", type: 'string', desc: baseLite.bundle.getText("importExcelCacheMode") }, dryRun: { alias: ['dr'], type: 'boolean', default: false, desc: 'Preview import results without committing to database' }, maxFileSizeMB: { alias: ['mfs'], type: 'number', default: 500, desc: 'Maximum file size in MB (prevents memory exhaustion)' }, timeoutSeconds: { alias: ['ts'], type: 'number', default: 3600, desc: 'Import operation timeout in seconds (0 = no timeout)' }, nullValues: { alias: ['nv'], type: 'string', default: 'null,NULL,#N/A,', desc: 'Comma-separated list of values to treat as NULL' }, skipWithErrors: { alias: ['swe'], type: 'boolean', default: false, desc: 'Continue import even if errors exceed threshold (logs errors)' }, maxErrorsAllowed: { alias: ['mea'], type: 'number', default: -1, desc: 'Maximum errors allowed before stopping (-1 = unlimited)' }, profile: { alias: ['p'], type: 'string', desc: baseLite.bundle.getText("profile") } })).wrap(160).example('hana-cli import --filename data.csv --table myTable', baseLite.bundle.getText("importExample")).wrap(160).epilog(buildDocEpilogue('import', 'data-tools', ['export', 'dataValidator'])) export let inputPrompts = { filename: { description: baseLite.bundle.getText("importFilename"), type: 'string', required: true }, table: { description: baseLite.bundle.getText("importTable"), type: 'string', required: true }, schema: { description: baseLite.bundle.getText("importSchema"), type: 'string', required: false }, output: { description: baseLite.bundle.getText("importOutputFormat"), type: 'string', required: true }, matchMode: { description: baseLite.bundle.getText("importMatchMode"), type: 'string', required: true }, truncate: { description: baseLite.bundle.getText("importTruncate"), type: 'boolean', required: false, ask: () => { return false } }, batchSize: { description: baseLite.bundle.getText("importBatchSize"), type: 'number', required: false, ask: () => { return false } }, worksheet: { description: baseLite.bundle.getText("importWorksheet"), type: 'number', required: false, ask: () => { return false } }, startRow: { description: baseLite.bundle.getText("importStartRow"), type: 'number', required: false, ask: () => { return false } }, skipEmptyRows: { description: baseLite.bundle.getText("importSkipEmptyRows"), type: 'boolean', required: false, ask: () => { return false } }, excelCacheMode: { description: baseLite.bundle.getText("importExcelCacheMode"), type: 'string', required: false, ask: () => { return false } }, dryRun: { description: 'Preview import results without committing to database', type: 'boolean', required: false, ask: () => false }, maxFileSizeMB: { description: 'Maximum file size in MB (prevents memory exhaustion)', type: 'number', required: false, ask: () => false }, timeoutSeconds: { description: 'Import operation timeout in seconds (0 = no timeout)', type: 'number', required: false, ask: () => false }, nullValues: { description: 'Comma-separated list of values to treat as NULL', type: 'string', required: false, ask: () => false }, skipWithErrors: { description: 'Continue import even if errors exceed threshold (logs errors)', type: 'boolean', required: false, ask: () => false }, maxErrorsAllowed: { description: 'Maximum errors allowed before stopping (-1 = unlimited)', type: 'number', required: false, ask: () => false }, profile: { description: baseLite.bundle.getText("profile"), type: 'string', required: false, ask: () => { } } } /** * Command handler function * @param {object} argv - Command line arguments from yargs * @returns {Promise<void>} */ export async function handler(argv) { const base = await import('../utils/base.js') base.promptHandler(argv, importData, inputPrompts) } const DEFAULT_BATCH_SIZE = 1000 const MAX_ERROR_DETAILS = 100 const MAX_FILE_SIZE_MB = 500 // 500 MB default const DEFAULT_TIMEOUT_SECONDS = 3600 // 1 hour const DEFAULT_NULL_VALUES = ['null', 'NULL', '#N/A', ''] /** * Parse custom NULL values from configuration * @param {string} nullValuesStr - Comma-separated null value definitions * @returns {Set<string>} Set of values to treat as NULL */ function parseNullValues(nullValuesStr) { if (!nullValuesStr || typeof nullValuesStr !== 'string') { return new Set(DEFAULT_NULL_VALUES) } const values = nullValuesStr.split(',').map(v => v.trim()) return new Set(values) } /** * Calculate optimal batch size based on available memory and row size estimate * @param {number} requestedBatchSize - User-requested batch size * @param {number} estimatedRowSizeBytes - Estimated size of one row * @returns {number} Adjusted batch size */ function calcOptimalBatchSize(requestedBatchSize, estimatedRowSizeBytes = 1000) { const memStats = process.memoryUsage() const heapLimit = memStats.heapTotal const maxMemoryPerBatch = heapLimit * 0.3 // Use max 30% of heap for batch const maxBatchByMemory = Math.floor(maxMemoryPerBatch / estimatedRowSizeBytes) const safeBatchSize = Math.max(1, Math.min(requestedBatchSize, maxBatchByMemory)) return safeBatchSize } /** * Format bytes to human-readable format * @param {number} bytes * @returns {string} */ function formatBytes(bytes) { if (bytes === 0) return '0 Bytes' const k = 1024 const sizes = ['Bytes', 'KB', 'MB', 'GB'] const i = Math.floor(Math.log(bytes) / Math.log(k)) return Math.round(bytes / Math.pow(k, i) * 100) / 100 + ' ' + sizes[i] } /** * Format elapsed time to human-readable format * @param {number} milliseconds * @returns {string} */ function formatElapsedTime(milliseconds) { const totalSeconds = Math.floor(milliseconds / 1000) const hours = Math.floor(totalSeconds / 3600) const minutes = Math.floor((totalSeconds % 3600) / 60) const seconds = totalSeconds % 60 if (hours > 0) { return `${hours}h ${minutes}m ${seconds}s` } else if (minutes > 0) { return `${minutes}m ${seconds}s` } else { return `${seconds}s` } } /** * Normalize header values from files * @param {any} value - Header value * @param {number} index - Column index * @returns {string} Normalized header name */ function normalizeHeaderValue(value, index) { const raw = value === null || value === undefined ? '' : String(value) const trimmed = raw.replace(/^\uFEFF/, '').trim() return trimmed || `Column ${index + 1}` } /** * Validate file path and ensure the file exists, preventing path traversal attacks * @param {string} filePath - Path to file * @param {number} maxFileSizeMB - Maximum allowed file size in MB * @returns {Promise<string>} Resolved file path */ async function validateFilePath(filePath, maxFileSizeMB = MAX_FILE_SIZE_MB) { const { default: fs } = await import('fs') const { default: path } = await import('path') if (!filePath || typeof filePath !== 'string' || filePath.includes('\0')) { throw new Error(baseLite.bundle.getText("errInvalidFilePath", [filePath])) } // Prevent path traversal attacks const resolvedPath = path.resolve(filePath) const cwd = process.cwd() const relativePath = path.relative(cwd, resolvedPath) // Check for path traversal attempts if (relativePath.startsWith('..') || path.isAbsolute(relativePath)) { throw new Error(`Security violation: Access to file outside current directory is not allowed: ${filePath}`) } try { const stats = await fs.promises.stat(resolvedPath) if (!stats.isFile()) { throw new Error(baseLite.bundle.getText("errFileNotFound", [resolvedPath])) } // Check file size const fileSizeBytes = stats.size const fileSizeMB = fileSizeBytes / (1024 * 1024) if (fileSizeMB > maxFileSizeMB) { throw new Error(`File size (${fileSizeMB.toFixed(2)} MB) exceeds maximum allowed size (${maxFileSizeMB} MB)`) } } catch (error) { if (error.message.includes('exceeds maximum') || error.message.includes('Security violation') || error.message.includes('outside current directory')) { throw error } throw new Error(baseLite.bundle.getText("errFileNotFound", [resolvedPath])) } return resolvedPath } /** * Create CSV record iterator using streaming parser * @param {string} filePath - Path to CSV file * @param {number} maxFileSizeMB - Maximum allowed file size in MB * @returns {Promise<{iterator: AsyncGenerator<object>, getColumns: () => Array<string>}>} */ async function createCsvRecordIterator(filePath, maxFileSizeMB = MAX_FILE_SIZE_MB) { const { default: fs } = await import('fs') const resolvedPath = await validateFilePath(filePath, maxFileSizeMB) let columns = null const parser = parse({ bom: true, relax_quotes: false, trim: true, skip_empty_lines: true, relax_column_count: true, columns: (header) => { columns = header.map((value, index) => normalizeHeaderValue(value, index)) return columns } }) const stream = fs.createReadStream(resolvedPath) stream.on('error', (error) => parser.destroy(error)) const iterator = (async function* () { for await (const record of stream.pipe(parser)) { yield record } })() return { iterator, getColumns: () => columns } } /** * Normalize Excel cell values * @param {any} cellValue - ExcelJS cell value * @returns {any} Normalized value */ function normalizeExcelCellValue(cellValue) { if (cellValue === null || cellValue === undefined) { return null } if (cellValue instanceof Date) { return cellValue } if (typeof cellValue === 'object') { if (Object.prototype.hasOwnProperty.call(cellValue, 'text')) { return cellValue.text } if (Object.prototype.hasOwnProperty.call(cellValue, 'richText')) { return cellValue.richText.map(part => part.text).join('') } if (Object.prototype.hasOwnProperty.call(cellValue, 'formula') && Object.prototype.hasOwnProperty.call(cellValue, 'result')) { return cellValue.result } if (Object.prototype.hasOwnProperty.call(cellValue, 'hyperlink') && Object.prototype.hasOwnProperty.call(cellValue, 'text')) { return cellValue.text } return cellValue.result ?? cellValue.text ?? cellValue } return cellValue } /** * Create Excel record iterator using streaming reader * @param {string} filePath - Path to Excel file * @param {object} options - Excel reading options * @param {number} [options.worksheet] - Worksheet number to read (1-based) * @param {number} [options.startRow] - Starting row number (1-based, row 1 is header by default) * @param {boolean} [options.skipEmptyRows] - Skip rows with all empty values * @param {'cache'|'emit'|'ignore'} [options.cacheMode] - Shared strings cache mode * @param {number} [options.maxFileSizeMB] - Maximum allowed file size in MB * @returns {Promise<{iterator: AsyncGenerator<object>, getColumns: () => Array<string>}>} */ async function createExcelRecordIterator(filePath, options) { const maxFileSizeMB = options?.maxFileSizeMB || MAX_FILE_SIZE_MB const resolvedPath = await validateFilePath(filePath, maxFileSizeMB) const targetWorksheet = options?.worksheet || 1 const startRow = options?.startRow || 1 const skipEmptyRows = options?.skipEmptyRows !== false // default true /** @type {'cache'|'emit'|'ignore'} */ const cacheMode = options?.cacheMode || 'cache' const workbookReader = new ExcelJS.stream.xlsx.WorkbookReader(resolvedPath, { entries: 'emit', worksheets: 'emit', sharedStrings: cacheMode, styles: cacheMode === 'cache' ? 'cache' : 'ignore' }) let columns = null let currentWorksheetCount = 0 const iterator = (async function* () { for await (const worksheetReader of workbookReader) { currentWorksheetCount++ const worksheetId = /** @type {any} */ (worksheetReader)?.id // Skip worksheets until we reach the target one if (worksheetId && worksheetId !== targetWorksheet && currentWorksheetCount !== targetWorksheet) { continue } for await (const row of worksheetReader) { // Handle header row based on startRow setting if (row.number === startRow) { const values = Array.isArray(row.values) ? row.values.slice(1) : [] columns = values.map((value, index) => normalizeHeaderValue(value, index)) continue } // Skip rows before the startRow if (row.number < startRow) { continue } if (!columns) { continue } const values = Array.isArray(row.values) ? row.values : [] const record = {} columns.forEach((header, index) => { record[header] = normalizeExcelCellValue(values[index + 1]) }) // Check if row has any non-empty values const hasData = Object.values(record).some(v => v !== null && v !== undefined && v !== '') if (!skipEmptyRows || hasData) { yield record } } break // Only process the first matching worksheet } })() return { iterator, getColumns: () => columns } } /** * Determine if identifier is quoted * @param {string} identifier - SQL identifier * @returns {boolean} */ function isQuotedIdentifier(identifier) { return identifier.length >= 2 && identifier.startsWith('"') && identifier.endsWith('"') } /** * Unquote an identifier, validating escaped quotes * @param {string} identifier - Quoted identifier * @returns {string|null} */ function unquoteIdentifier(identifier) { if (!isQuotedIdentifier(identifier)) { return null } const inner = identifier.slice(1, -1) let result = '' for (let i = 0; i < inner.length; i++) { const char = inner[i] if (char === '"') { if (inner[i + 1] === '"') { result += '"' i++ } else { return null } } else if (char === '\n' || char === '\r' || char === '\0') { return null } else { result += char } } return result } /** * Validate SQL identifier (unquoted form) * @param {string} identifier - SQL identifier to validate * @returns {boolean} True if valid */ function isValidSQLIdentifier(identifier) { if (!identifier || typeof identifier !== 'string') { return false } // Allow alphanumeric, underscore, and specific special chars, but prevent SQL injection // HANA allows up to 127 characters for identifiers const validPattern = /^[A-Za-z_][A-Za-z0-9_$#]{0,126}$/ return validPattern.test(identifier) && !identifier.includes('--') && !identifier.includes('/*') } /** * Normalize identifier for database kind * @param {string} identifier - Identifier value * @param {string} dbKind - Database kind * @param {boolean} wasQuoted - Whether input was quoted * @returns {string} */ function normalizeIdentifierForDb(identifier, dbKind, wasQuoted) { if (wasQuoted) { return identifier } if (dbKind === 'hana') { return identifier.toUpperCase() } if (dbKind === 'postgres') { return identifier.toLowerCase() } return identifier } /** * Parse and validate identifier input * @param {string} identifier - Identifier input * @param {string} dbKind - Database kind * @returns {{name: string, quoted: boolean}} */ function parseIdentifier(identifier, dbKind) { if (!identifier || typeof identifier !== 'string') { return { name: '', quoted: false } } const trimmed = identifier.trim() if (isQuotedIdentifier(trimmed)) { const unquoted = unquoteIdentifier(trimmed) if (unquoted === null || unquoted === '') { return { name: '', quoted: false } } return { name: normalizeIdentifierForDb(unquoted, dbKind, true), quoted: true } } if (!isValidSQLIdentifier(trimmed)) { return { name: '', quoted: false } } return { name: normalizeIdentifierForDb(trimmed, dbKind, false), quoted: false } } /** * Split qualified name into parts, respecting quoted identifiers * @param {string} input - Qualified identifier * @returns {Array<string>} */ function splitQualifiedName(input) { const parts = [] let current = '' let inQuotes = false for (let i = 0; i < input.length; i++) { const char = input[i] const nextChar = input[i + 1] if (char === '"') { current += char if (inQuotes && nextChar === '"') { current += nextChar i++ } else { inQuotes = !inQuotes } continue } if (char === '.' && !inQuotes) { parts.push(current) current = '' continue } current += char } if (current) { parts.push(current) } return parts } /** * Parse qualified table input * @param {string} input - Qualified table name * @param {string} dbKind - Database kind * @returns {{schema: string|null, table: string}} */ function parseQualifiedTableName(input, dbKind) { const parts = splitQualifiedName(input) if (parts.length === 2) { const schemaInfo = parseIdentifier(parts[0], dbKind) const tableInfo = parseIdentifier(parts[1], dbKind) if (!schemaInfo.name || !tableInfo.name) { return { schema: null, table: '' } } return { schema: schemaInfo.name || null, table: tableInfo.name } } if (parts.length === 1) { const tableInfo = parseIdentifier(parts[0], dbKind) return { schema: null, table: tableInfo.name } } return { schema: null, table: '' } } /** * Quote SQL identifier safely * @param {string} identifier - Identifier value * @returns {string} */ function quoteIdentifier(identifier) { return `"${String(identifier).replace(/"/g, '""')}"` } /** * Format qualified table name * @param {string|null} schema - Schema name * @param {string} table - Table name * @returns {string} */ function formatQualifiedName(schema, table) { if (schema) { return `${quoteIdentifier(schema)}.${quoteIdentifier(table)}` } return quoteIdentifier(table) } /** * Resolve current schema for database kind * @param {object} dbClient - Database client * @param {string} dbKind - Database kind * @returns {Promise<string|null>} */ async function getCurrentSchema(dbClient, dbKind) { if (dbKind === 'postgres') { const result = await dbClient.execSQL('SELECT current_schema() AS CURRENT_SCHEMA') return result?.[0]?.CURRENT_SCHEMA ?? result?.[0]?.current_schema ?? null } if (dbKind === 'sqlite') { return null } const result = await dbClient.execSQL('SELECT CURRENT_SCHEMA FROM DUMMY') return result?.[0]?.CURRENT_SCHEMA ?? null } /** * Validate required columns are present in mapping * @param {Object} columnMapping - Mapping of file columns to table columns * @param {Object} tableMetadata - Table metadata */ function validateRequiredColumns(columnMapping, tableMetadata) { const mappedColumns = new Set(Object.values(columnMapping)) const missingRequired = Object.entries(tableMetadata.columns) .filter(([columnName, info]) => !info.nullable && !mappedColumns.has(columnName) && (info.defaultValue === null || info.defaultValue === undefined)) .map(([columnName]) => columnName) if (missingRequired.length > 0) { throw new Error(`Missing required columns: ${missingRequired.join(', ')}`) } } /** * Normalize value for database parameter binding * @param {any} value - Value to normalize * @param {Set<string>} nullValues - Set of values to treat as NULL * @returns {any} */ function normalizeValueForDb(value, nullValues = new Set(DEFAULT_NULL_VALUES)) { if (value === null || value === undefined) { return null } // Check against custom NULL values if (typeof value === 'string' && nullValues.has(value)) { return null } if (value instanceof Date) { return value.toISOString() } if (typeof value === 'boolean') { return value ? 1 : 0 } return value } /** * Get table metadata from database * @param {object} dbClient - Database client instance * @param {string} schema - Schema name * @param {string} table - Table name * @returns {Promise<Object>} Table metadata with columns */ async function getTableMetadata(dbClient, schema, table, dbKind) { const base = await import('../utils/base.js') if (!table) { throw new Error(baseLite.bundle.getText("errInvalidTable", [table])) } const normalizedKind = (dbKind || '').toLowerCase() let query = '' let params = [] if (normalizedKind === 'postgres') { query = ` SELECT column_name AS COLUMN_NAME, ordinal_position AS POSITION, data_type AS DATA_TYPE, is_nullable AS NULLABLE, column_default AS DEFAULT_VALUE FROM information_schema.columns WHERE table_schema = ? AND table_name = ? ORDER BY ordinal_position ` params = [schema, table] } else if (normalizedKind === 'sqlite') { query = ` SELECT name AS COLUMN_NAME, (cid + 1) AS POSITION, type AS DATA_TYPE, notnull AS NOTNULL, dflt_value AS DEFAULT_VALUE FROM pragma_table_info(?) ORDER BY cid ` params = [table] } else { if (!schema) { throw new Error(baseLite.bundle.getText("errInvalidSchema", [schema])) } query = ` SELECT COLUMN_NAME, POSITION, DATA_TYPE_NAME AS DATA_TYPE, IS_NULLABLE AS NULLABLE, DEFAULT_VALUE FROM SYS.TABLE_COLUMNS WHERE SCHEMA_NAME = ? AND TABLE_NAME = ? ORDER BY POSITION ` params = [schema, table] } const columns = await dbClient.execSQL(query, params) if (!columns || columns.length === 0) { throw new Error(baseLite.bundle.getText("errTableNotFound", [schema || '', table])) } const normalizedColumns = columns.reduce((acc, col) => { const columnName = col.COLUMN_NAME ?? col.column_name ?? col.name const position = Number(col.POSITION ?? col.ordinal_position ?? col.position ?? col.cid ?? 0) || 0 const dataType = col.DATA_TYPE ?? col.DATA_TYPE_NAME ?? col.data_type ?? col.type ?? '' const defaultValue = col.DEFAULT_VALUE ?? col.column_default ?? col.dflt_value let nullable = true if (typeof col.NULLABLE === 'string') { nullable = col.NULLABLE.toUpperCase() === 'TRUE' || col.NULLABLE.toUpperCase() === 'YES' } else if (typeof col.NOTNULL === 'number') { nullable = col.NOTNULL === 0 } else if (typeof col.notnull === 'number') { nullable = col.notnull === 0 } else if (typeof col.IS_NULLABLE === 'string') { nullable = col.IS_NULLABLE.toUpperCase() === 'YES' } acc[columnName] = { position, dataType, nullable, defaultValue } return acc }, {}) base.debug(baseLite.bundle.getText("debug.tableColumnsLoaded", [columns.length])) return { schema, table, columns: normalizedColumns } } /** * Match file columns to table columns * @param {Array<string>} fileColumns - Column names from file * @param {Object} tableMetadata - Table metadata with columns * @param {string} matchMode - Matching mode: 'order', 'name', or 'auto' * @returns {Object} Mapping of file columns to table columns */ function matchColumns(fileColumns, tableMetadata, matchMode) { const tableColumns = Object.keys(tableMetadata.columns) const tableColumnsUpper = new Map(tableColumns.map(col => [col.toUpperCase(), col])) const tableColumnsExact = new Set(tableColumns) const mapping = {} if (matchMode === 'order') { // Match by position order fileColumns.forEach((fileCol, index) => { if (index < tableColumns.length) { const normalizedFileCol = typeof fileCol === 'string' ? fileCol.trim() : String(fileCol) mapping[normalizedFileCol] = tableColumns[index] } }) } else if (matchMode === 'name' || matchMode === 'auto') { // Try to match by name (case-insensitive) fileColumns.forEach((fileCol) => { const normalizedFileCol = typeof fileCol === 'string' ? fileCol.trim() : String(fileCol) const unquotedFileCol = isQuotedIdentifier(normalizedFileCol) ? (unquoteIdentifier(normalizedFileCol) ?? normalizedFileCol) : normalizedFileCol const exactMatch = tableColumnsExact.has(unquotedFileCol) ? unquotedFileCol : null const match = exactMatch || tableColumnsUpper.get(unquotedFileCol.toUpperCase()) if (match) { mapping[normalizedFileCol] = match } else if (matchMode === 'auto') { // In auto mode, if no name match, use position const fileIndex = fileColumns.indexOf(fileCol) if (fileIndex < tableColumns.length) { mapping[normalizedFileCol] = tableColumns[fileIndex] } } }) } return mapping } /** * Convert and validate data based on column data types * @param {object} record - Data record * @param {Object} columnMapping - Mapping of file columns to table columns * @param {Object} tableMetadata - Table metadata with data types * @param {Set<string>} nullValues - Set of values to treat as NULL * @returns {object} Converted record with values */ function convertDataTypes(record, columnMapping, tableMetadata, nullValues = new Set(DEFAULT_NULL_VALUES)) { const converted = {} for (const [fileCol, tableCol] of Object.entries(columnMapping)) { const value = record[fileCol] const columnInfo = tableMetadata.columns[tableCol] // Check if value should be treated as NULL const isNullValue = value === null || value === undefined || value === '' || (typeof value === 'string' && nullValues.has(value)) if (isNullValue) { // Check if column is nullable if (!columnInfo.nullable) { throw new Error(`Column ${tableCol} is NOT NULL but received empty value`) } converted[tableCol] = null continue } const dataType = String(columnInfo.dataType || '').toUpperCase() try { switch (true) { case dataType.includes('INT'): { const parsed = typeof value === 'number' ? value : parseInt(String(value).trim(), 10) if (isNaN(parsed)) { throw new Error(`Invalid integer value for column ${tableCol}: ${value}`) } converted[tableCol] = parsed break } case dataType.includes('DECIMAL') || dataType.includes('NUMERIC') || dataType.includes('REAL') || dataType.includes('DOUBLE'): { const parsed = typeof value === 'number' ? value : parseFloat(String(value).trim()) if (isNaN(parsed)) { throw new Error(`Invalid numeric value for column ${tableCol}: ${value}`) } converted[tableCol] = parsed break } case dataType.includes('BOOLEAN'): if (typeof value === 'string') { const normalized = value.trim().toLowerCase() if (['true', '1', 'yes', 'y', 't'].includes(normalized)) { converted[tableCol] = true } else if (['false', '0', 'no', 'n', 'f'].includes(normalized)) { converted[tableCol] = false } else { throw new Error(`Invalid boolean value for column ${tableCol}: ${value}`) } } else { converted[tableCol] = Boolean(value) } break case dataType.includes('DATE'): { if (value instanceof Date) { converted[tableCol] = isNaN(value.getTime()) ? null : value } else { const parsed = new Date(String(value)) if (isNaN(parsed.getTime())) { throw new Error(`Invalid date value for column ${tableCol}: ${value}`) } converted[tableCol] = parsed } break } case dataType.includes('TIMESTAMP'): { if (value instanceof Date) { converted[tableCol] = isNaN(value.getTime()) ? null : value } else { const parsed = new Date(String(value)) if (isNaN(parsed.getTime())) { throw new Error(`Invalid timestamp value for column ${tableCol}: ${value}`) } converted[tableCol] = parsed } break } default: converted[tableCol] = String(value) } } catch (error) { // Re-throw validation errors throw error } } return converted } /** * Build INSERT statement for a single record * @param {string} schema - Schema name * @param {string} table - Table name * @param {object} record - Data record with converted types * @returns {{sql: string, values: Array<any>}} INSERT SQL statement and values */ function buildInsertStatement(schema, table, record) { return buildBatchInsertStatement(schema, table, [record]) } /** * Build batch INSERT statement for multiple records * @param {string} schema - Schema name * @param {string} table - Table name * @param {Array<object>} records - Array of data records with converted types * @param {Set<string>} nullValues - Set of values to treat as NULL * @returns {{sql: string, values: Array<any>}} Batch INSERT SQL statement and values */ function buildBatchInsertStatement(schema, table, records, nullValues = new Set(DEFAULT_NULL_VALUES)) { if (!records || records.length === 0) { throw new Error('No records provided for batch insert') } const columns = Object.keys(records[0]) const values = [] const valuesClause = records.map(record => { const rowPlaceholders = columns.map(col => { values.push(normalizeValueForDb(record[col], nullValues)) return '?' }) return `(${rowPlaceholders.join(', ')})` }).join(', ') const qualifiedName = formatQualifiedName(schema, table) const columnsClause = columns.map(col => quoteIdentifier(col)).join(', ') const sql = `INSERT INTO ${qualifiedName} (${columnsClause}) VALUES ${valuesClause}` return { sql, values } } /** * Import data from file into database table * @param {object} prompts - Input prompts with filename, table, output format, etc * @returns {Promise<object>} Import result summary */ export async function importData(prompts) { const base = await import('../utils/base.js') base.debug('importData') // Validate parameters const timeoutSeconds = prompts.timeoutSeconds || DEFAULT_TIMEOUT_SECONDS const maxFileSizeMB = prompts.maxFileSizeMB || MAX_FILE_SIZE_MB const nullValues = parseNullValues(prompts.nullValues) const dryRun = prompts.dryRun || false const skipWithErrors = prompts.skipWithErrors || false const maxErrorsAllowed = prompts.maxErrorsAllowed || -1 // -1 = unlimited try { base.setPrompts(prompts) // Set operation timeout (if supported by runtime) const abortController = new AbortController() const timeoutHandle = timeoutSeconds > 0 ? setTimeout(() => abortController.abort(), timeoutSeconds * 1000) : null // Connect to database const dbClient = await dbClientClass.getNewClient(prompts) await dbClient.connect() const dbKind = (dbClient.getKind() || 'hana').toLowerCase() // Parse table name (format: schema.table or just table) const parsedTable = parseQualifiedTableName(prompts.table, dbKind) let schema = parsedTable.schema let table = parsedTable.table if (!table) { throw new Error(baseLite.bundle.getText("errInvalidTableFormat", [prompts.table])) } if (!schema && prompts.schema && prompts.schema !== '**CURRENT_SCHEMA**') { const parsedSchema = parseIdentifier(prompts.schema, dbKind) if (!parsedSchema.name) { throw new Error(baseLite.bundle.getText("errInvalidSchema", [prompts.schema])) } schema = parsedSchema.name } if (!schema && dbKind !== 'sqlite') { const currentSchema = await getCurrentSchema(dbClient, dbKind) if (!currentSchema) { throw new Error(baseLite.bundle.getText("errNoSchemaSpecified")) } schema = currentSchema } if (dbKind === 'sqlite' && schema) { base.debug(`Ignoring schema for sqlite: ${schema}`) schema = null } // Read file data as stream base.debug(baseLite.bundle.getText("debug.readingFile", [prompts.filename])) const iteratorInfo = prompts.output === 'excel' ? await createExcelRecordIterator(prompts.filename, { worksheet: prompts.worksheet || 1, startRow: prompts.startRow || 1, skipEmptyRows: prompts.skipEmptyRows !== false, cacheMode: prompts.excelCacheMode || 'cache', maxFileSizeMB: maxFileSizeMB }) : await createCsvRecordIterator(prompts.filename, maxFileSizeMB) const recordIterator = iteratorInfo.iterator // Get table metadata const tableMetadata = await getTableMetadata(dbClient, schema, table, dbKind) base.debug(baseLite.bundle.getText("debug.tableMetadataLoaded")) // Start transaction when supported by the active DB kind base.debug(baseLite.bundle.getText("debug.startingTransaction")) const shouldUseExplicitTransaction = dbKind !== 'hana' if (shouldUseExplicitTransaction) { await dbClient.execSQL('BEGIN') } else { base.debug('Skipping explicit BEGIN for HANA import flow') } // Log truncate operation for audit trail const operationLog = [] const startTime = Date.now() // Initialize counters outside try block for access after transaction let successCount = 0 let errorCount = 0 let rowsProcessed = 0 const errors = [] let BATCH_SIZE = DEFAULT_BATCH_SIZE try { // Truncate table if requested if (prompts.truncate) { base.debug(baseLite.bundle.getText("debug.truncatingTable")) const truncateStatement = dbKind === 'sqlite' ? `DELETE FROM ${formatQualifiedName(schema, table)}` : `TRUNCATE TABLE ${formatQualifiedName(schema, table)}` await dbClient.execSQL(truncateStatement) const auditMsg = `[${new Date().toISOString()}] TRUNCATE ${schema ? schema + '.' : ''}${table}` operationLog.push(auditMsg) base.debug(`Audit: ${auditMsg}`) console.log(baseLite.bundle.getText("tablesTruncated", [table])) } // Insert data in batches for better performance base.debug(baseLite.bundle.getText("debug.startingImport")) BATCH_SIZE = prompts.batchSize || DEFAULT_BATCH_SIZE // Validate batch size if (BATCH_SIZE < 1 || BATCH_SIZE > 10000) { throw new Error(`Batch size must be between 1 and 10000, got: ${BATCH_SIZE}`) } base.debug(`Using batch size: ${BATCH_SIZE}`) let fileColumns = null let columnMapping = null const batch = [] let lastProgressUpdate = startTime let estimatedRowSize = 0 const PROGRESS_UPDATE_INTERVAL = 5000 // Update progress every 5 seconds const MEMORY_CHECK_INTERVAL = 100 // Check memory every 100 rows let rowsSinceMemCheck = 0 const addError = (row, message) => { errorCount++ if (errors.length < MAX_ERROR_DETAILS) { errors.push({ row, error: message }) } } const logProgress = (force = false) => { const now = Date.now() if (force || (now - lastProgressUpdate > PROGRESS_UPDATE_INTERVAL)) { const elapsedSeconds = (now - startTime) / 1000 const rate = rowsProcessed > 0 ? (rowsProcessed / elapsedSeconds).toFixed(1) : '0' const memUsage = formatBytes(process.memoryUsage().heapUsed) const progress = `Processed: ${rowsProcessed} rows | Inserted: ${successCount} | Errors: ${errorCount} | Rate: ${rate} rows/sec | Memory: ${memUsage} | Elapsed: ${formatElapsedTime(now - startTime)}` base.debug(progress) lastProgressUpdate = now if (base.verboseOutput(prompts)) { console.log(` ${progress}`) } } } const maybeAdjustBatchSize = (row) => { rowsSinceMemCheck++ if (rowsSinceMemCheck >= MEMORY_CHECK_INTERVAL) { if (estimatedRowSize === 0) { estimatedRowSize = JSON.stringify(row).length } const newBatchSize = calcOptimalBatchSize(BATCH_SIZE, estimatedRowSize) if (newBatchSize !== BATCH_SIZE) { base.debug(`Adjusting batch size from ${BATCH_SIZE} to ${newBatchSize} (memory optimization)`) BATCH_SIZE = newBatchSize } rowsSinceMemCheck = 0 } } const insertBatchRecursive = async (batchItems) => { if (!batchItems || batchItems.length === 0) { return } try { const { sql, values } = buildBatchInsertStatement(schema, table, batchItems.map(item => item.record), nullValues) if (!dryRun) { await dbClient.execSQL(sql, values) } successCount += batchItems.length base.debug(baseLite.bundle.getText("debug.batchInserted", [batchItems.length, successCount])) } catch (error) { base.debug(baseLite.bundle.getText("debug.batchFailed", [error.message])) if (batchItems.length === 1) { addError(batchItems[0].originalIndex, error.message) base.debug(baseLite.bundle.getText("debug.rowInsertFailed", [batchItems[0].originalIndex, error.message])) return } const mid = Math.ceil(batchItems.length / 2) await insertBatchRecursive(batchItems.slice(0, mid)) await insertBatchRecursive(batchItems.slice(mid)) } } for await (const record of recordIterator) { rowsProcessed++ if (!fileColumns) { fileColumns = Object.keys(record) columnMapping = matchColumns(fileColumns, tableMetadata, prompts.matchMode) base.debug(baseLite.bundle.getText("debug.columnsMatched", [Object.keys(columnMapping).length])) if (Object.keys(columnMapping).length === 0) { throw new Error(baseLite.bundle.getText("errNoColumnsMatched")) } validateRequiredColumns(columnMapping, tableMetadata) } try { const convertedRecord = convertDataTypes(record, columnMapping, tableMetadata, nullValues) batch.push({ record: convertedRecord, originalIndex: rowsProcessed }) maybeAdjustBatchSize(record) logProgress() } catch (error) { addError(rowsProcessed, error.message) base.debug(baseLite.bundle.getText("debug.rowValidationFailed", [rowsProcessed, error.message])) // Check if we should continue despite error threshold if (maxErrorsAllowed >= 0 && errorCount > maxErrorsAllowed && !skipWithErrors) { throw new Error(`Error threshold exceeded: ${errorCount} errors (max: ${maxErrorsAllowed})`) } } if (batch.length >= BATCH_SIZE) { await insertBatchRecursive(batch.splice(0, batch.length)) } // Check error threshold if (maxErrorsAllowed > 0 && errorCount >= maxErrorsAllowed && !skipWithErrors) { throw new Error(`Error threshold exceeded: ${errorCount} errors (max: ${maxErrorsAllowed})`) } } if (batch.length > 0) { await insertBatchRecursive(batch) } // Final progress update logProgress(true) if (rowsProcessed === 0) { base.error(baseLite.bundle.getText("errNoDataInFile")) await dbClient.execSQL('ROLLBACK') await dbClient.disconnect() return { success: false, rowsProcessed: 0 } } // Commit transaction (or rollback if dry-run) for DBs using explicit transactions if (shouldUseExplicitTransaction) { if (dryRun) { await dbClient.execSQL('ROLLBACK') base.debug('Dry-run completed - transaction rolled back') } else { await dbClient.execSQL('COMMIT') base.debug(baseLite.bundle.getText("debug.transactionCommitted")) } } else if (dryRun) { base.debug('Dry-run requested on HANA without explicit transaction support; skipping commit') } } catch (error) { // Rollback on error for DBs using explicit transactions if (shouldUseExplicitTransaction) { base.debug(baseLite.bundle.getText("debug.rollingBack")) await dbClient.execSQL('ROLLBACK') } throw error } await dbClient.disconnect() // Display results const elapsedTime = Date.now() - startTime const result = { success: (dryRun || errorCount === 0) && !skipWithErrors, rowsProcessed: rowsProcessed, rowsInserted: successCount, rowsWithErrors: errorCount, table: schema ? `${schema}.${table}` : table, matchMode: prompts.matchMode, truncated: prompts.truncate || false, batchSize: BATCH_SIZE, dryRun: dryRun, operationLog: operationLog, executionTimeMs: elapsedTime, executionTime: formatElapsedTime(elapsedTime), throughput: rowsProcessed > 0 ? (rowsProcessed / (elapsedTime / 1000)).toFixed(2) : 0 } // Add Excel-specific info if applicable if (prompts.output === 'excel') { result.excelWorksheet = prompts.worksheet || 1 result.excelStartRow = prompts.startRow || 1 result.excelSkipEmptyRows = prompts.skipEmptyRows !== false } // Cleanup timeout if set if (timeoutHandle) { clearTimeout(timeoutHandle) } console.log(`\n${baseLite.bundle.getText("importSummary")}`) base.outputTableFancy([result]) // Show memory stats const memStats = process.memoryUsage() console.log(`\nMemory Usage:`) console.log(` Heap Used: ${formatBytes(memStats.heapUsed)} / ${formatBytes(memStats.heapTotal)}`) console.log(` External: ${formatBytes(memStats.external)}`) if (errorCount > 0 && errors.length > 0) { console.log(`\n${baseLite.bundle.getText("importErrors")}`) console.log(JSON.stringify(errors.slice(0, 10), null, 2)) if (errors.length > 10) { console.log(`... and ${errors.length - 10} more errors`) } } return result } catch (error) { // Handle timeout errors specially if (error instanceof DOMException && error.name === 'AbortError') { const errorMsg = `Import operation timed out after ${timeoutSeconds} seconds` await base.error(new Error(errorMsg)) return { success: false, rowsProcessed: 0, timedOut: true } } await base.error(error) return { success: false, rowsProcessed: 0 } } }