UNPKG

autosql

Version:

An auto-parser of JSON into SQL.

420 lines (419 loc) 19.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.initializeMetaData = initializeMetaData; exports.getDataHeaders = getDataHeaders; exports.getMetaData = getMetaData; exports.compareMetaData = compareMetaData; const utilities_1 = require("./utilities"); const groupings_1 = require("../config/groupings"); const columnTypes_1 = require("./columnTypes"); const columnTypes_2 = require("./columnTypes"); const defaults_1 = require("../config/defaults"); const keys_1 = require("./keys"); const database_1 = require("../db/database"); const mysqlConfig_1 = require("../db/config/mysqlConfig"); const pgsqlConfig_1 = require("../db/config/pgsqlConfig"); function initializeMetaData(headers) { try { return headers.map(header => ({ [header]: { type: null, length: 0, allowNull: false, unique: false, index: false, pseudounique: false, primary: false, autoIncrement: false, default: undefined, decimal: 0 } })); } catch (error) { throw new Error(`Error in initializeMetaData: ${error}`); } } async function getDataHeaders(data, databaseConfig) { const sampling = databaseConfig.sampling; const samplingMinimum = databaseConfig.samplingMinimum; let metaData = {}; const allColumns = new Set(); let metaDataInterim = {}; if ((sampling !== undefined || samplingMinimum !== undefined) && (sampling === undefined || samplingMinimum === undefined)) { throw new Error("Both sampling percentage and sampling minimum must be provided together."); } const dialect = databaseConfig.sqlDialect; const db = database_1.Database.create({ sqlDialect: dialect }); const dialectConfig = db.getDialectConfig(); let sampleData = data; let remainingData = []; if (sampling !== undefined && sampling > 0 && samplingMinimum !== undefined && data.length > samplingMinimum) { let sampleSize = Math.round(data.length * sampling); sampleSize = Math.max(sampleSize, samplingMinimum); // Ensure minimum sample size const shuffledData = (0, utilities_1.shuffleArray)(data); sampleData = shuffledData.slice(0, sampleSize); // Shuffle and take sample remainingData = shuffledData.slice(sampleSize); // Store remaining data } for (const row of sampleData) { const rowColumns = Object.keys(row); rowColumns.forEach(column => allColumns.add(column)); for (const column of allColumns) { const value = row[column]; if (metaData[column] == undefined) { metaData[column] = { type: null, length: 0, allowNull: false, unique: false, index: false, pseudounique: false, primary: false, autoIncrement: false, default: undefined, decimal: 0 }; } if (metaDataInterim[column] == undefined) { metaDataInterim[column] = { uniqueSet: new Set(), valueCount: 0, nullCount: 0, types: new Set(), length: 0, decimal: 0, trueMaxDecimal: 0 }; } if (value === '' || value === null || value === undefined || value === '\\N' || value === 'null') { metaData[column].allowNull = true; metaDataInterim[column].nullCount++; continue; } const type = (0, columnTypes_2.predictType)(value); if (!type) continue; const sqlizedValue = (0, utilities_1.sqlize)(value, type, dialectConfig, databaseConfig); metaDataInterim[column].valueCount++; metaDataInterim[column].uniqueSet.add(sqlizedValue); metaDataInterim[column].types.add(type); if (groupings_1.groupings.intGroup.includes(type) || groupings_1.groupings.specialIntGroup.includes(type)) { let valueStr = (0, utilities_1.normalizeNumber)(value); if (!valueStr) { valueStr = String(value).trim(); } const decimalLen = valueStr.includes(".") ? valueStr.split(".")[1].length : 0; const integerLen = valueStr.split(".")[0].length; metaDataInterim[column].decimal = Math.max(metaDataInterim[column].decimal, decimalLen); metaDataInterim[column].trueMaxDecimal = Math.max(metaDataInterim[column].trueMaxDecimal, metaDataInterim[column].decimal, decimalLen); metaDataInterim[column].decimal = Math.min(metaDataInterim[column].decimal, databaseConfig.decimalMaxLength || 10); metaDataInterim[column].length = Math.max(metaDataInterim[column].length, integerLen + metaDataInterim[column].decimal); } else { metaDataInterim[column].length = Math.max(metaDataInterim[column].length, String(value).length); } } } for (const column in metaDataInterim) { const type = (0, columnTypes_1.collateTypes)(metaDataInterim[column].types); metaDataInterim[column].collated_type = type; metaData[column].type = type; metaData[column].length = metaDataInterim[column].length || 0; metaData[column].decimal = metaDataInterim[column].decimal || 0; const uniquePercentage = metaDataInterim[column].uniqueSet.size / metaDataInterim[column].valueCount; if (uniquePercentage == 1 && metaDataInterim[column].uniqueSet.size > 0) { metaData[column].unique = true; } else if (uniquePercentage >= (databaseConfig.pseudoUnique || defaults_1.defaults.pseudoUnique) && metaDataInterim[column].uniqueSet.size > 0) { metaData[column].pseudounique = true; } else if (uniquePercentage <= (databaseConfig.categorical || defaults_1.defaults.categorical) && metaDataInterim[column].uniqueSet.size > 0 && !defaults_1.nonCategoricalTypes.includes(type)) { metaData[column].categorical = true; } else if (metaDataInterim[column].uniqueSet.size == 1 && metaDataInterim[column].nullCount == 0 && metaDataInterim[column].valueCount > 0) { metaData[column].singleValue = true; } if (metaDataInterim[column].nullCount !== 0) { metaData[column].allowNull = true; } if (metaData[column].length > (databaseConfig.maxKeyLength || defaults_1.defaults.maxKeyLength) && metaData[column].unique) { metaData[column].unique = false; } if (metaData[column].type === 'varchar' && metaData[column].length > (databaseConfig.maxVarcharLength || defaults_1.defaults.maxVarcharLength)) { metaData[column].type = 'text'; } } for (const row of remainingData) { for (const column of allColumns) { const value = row[column]; const type = metaData[column].type; if (!type) continue; if (groupings_1.groupings.intGroup.includes(type) || groupings_1.groupings.specialIntGroup.includes(type)) { let valueStr = (0, utilities_1.normalizeNumber)(value); if (!valueStr) { valueStr = String(value).trim(); } const decimalLen = valueStr.includes(".") ? valueStr.split(".")[1].length : 0; const integerLen = valueStr.split(".")[0].length; metaDataInterim[column].decimal = Math.max(metaDataInterim[column].decimal, decimalLen); metaDataInterim[column].trueMaxDecimal = Math.max(metaDataInterim[column].trueMaxDecimal, metaDataInterim[column].decimal, decimalLen); metaDataInterim[column].decimal = Math.min(metaDataInterim[column].decimal, databaseConfig.decimalMaxLength || 10); metaDataInterim[column].length = Math.max(metaDataInterim[column].length, integerLen + metaDataInterim[column].decimal); } else { metaDataInterim[column].length = Math.max(metaDataInterim[column].length, String(value).length); } } } for (const column in metaDataInterim) { // If type is not decimal, but decimal is set, add + 1 (for the dot) to length and set decimal to 0. Do this to metaDataInterim[column] so that it can be used later. // Also replace the metaDataInterim[column].decimal with metaDataInterim[column].trueMaxDecimal as if decimals were rounded due to exceeding the max decimal length, we want to keep the true max decimal length when converting to a non-decimal type. if (!dialectConfig.decimals.includes(metaDataInterim[column].collated_type || 'varchar')) { metaDataInterim[column].length = metaDataInterim[column].length + (metaDataInterim[column].decimal > 0 ? 1 : 0) - metaDataInterim[column].decimal + metaDataInterim[column].trueMaxDecimal; metaDataInterim[column].decimal = 0; } metaData[column].length = metaDataInterim[column].length || 0; metaData[column].decimal = metaDataInterim[column].decimal || 0; } const excludeBlankColumns = databaseConfig.excludeBlankColumns; if (excludeBlankColumns) { const emptyOrNullKeys = Object.entries(metaDataInterim) .filter(([_, meta]) => meta.uniqueSet.size === 0 && meta.valueCount === 0 && meta.nullCount > 0) .map(([key]) => key); for (const key of emptyOrNullKeys) { delete metaData[key]; } } return metaData; } async function getMetaData(databaseOrConfig, data, primaryKey) { try { let validatedConfig; let dbInstance; let dialectConfig; // Determine if input is a Database instance or a config object if (databaseOrConfig instanceof database_1.Database) { dbInstance = databaseOrConfig; validatedConfig = (0, utilities_1.validateConfig)(dbInstance.getConfig()); // Use existing Database config dialectConfig = dbInstance.getDialectConfig(); } else { validatedConfig = (0, utilities_1.validateConfig)(databaseOrConfig); // Use provided config if (validatedConfig.sqlDialect == 'mysql') { dialectConfig = mysqlConfig_1.mysqlConfig; } else if (validatedConfig.sqlDialect == 'pgsql') { dialectConfig = pgsqlConfig_1.pgsqlConfig; } else { throw new Error(`Unsupported SQL dialect: ${validatedConfig.sqlDialect}`); } } const sqlDialect = validatedConfig.sqlDialect; if (!sqlDialect) { throw new Error(`Unsupported SQL dialect: ${sqlDialect}`); } const headers = await getDataHeaders(data, validatedConfig); let metaData; if (validatedConfig.autoIndexing) { metaData = (0, keys_1.predictIndexes)(headers, validatedConfig.maxKeyLength, primaryKey || validatedConfig.primaryKey, data); } else { metaData = headers; } return metaData; } catch (error) { throw new Error(`Error in getMetaData: ${error}`); } } function compareMetaData(oldHeadersOriginal, newHeadersOriginal, dialectConfig) { if (!oldHeadersOriginal) { return { changes: { addColumns: {}, modifyColumns: {}, dropColumns: [], renameColumns: [], nullableColumns: [], noLongerUnique: [], primaryKeyChanges: [], }, updatedMetaData: newHeadersOriginal }; } const newHeaders = JSON.parse(JSON.stringify(newHeadersOriginal)); const oldHeaders = JSON.parse(JSON.stringify(oldHeadersOriginal)); const addColumns = {}; const modifyColumns = {}; const dropColumns = []; const renameColumns = []; const nullableColumns = []; const noLongerUnique = []; let oldPrimaryKeys = []; let newPrimaryKeys = []; let primaryKeyChanges = []; let renamedPrimaryKeys = []; // ✅ Identify removed columns for (const oldColumnName of Object.keys(oldHeaders)) { if (!newHeaders.hasOwnProperty(oldColumnName)) { dropColumns.push(oldColumnName); } } // ✅ Identify renamed columns for (const oldColumnName of Object.keys(oldHeaders)) { for (const newColumnName of Object.keys(newHeaders)) { const oldColumn = oldHeaders[oldColumnName]; const newColumn = newHeaders[newColumnName]; if (oldColumnName !== newColumnName && !(oldColumnName in newHeaders) && !(newColumnName in oldHeaders) && JSON.stringify(oldColumn) === JSON.stringify(newColumn)) { renameColumns.push({ oldName: oldColumnName, newName: newColumnName }); if (oldColumn.primary && newColumn.primary) { renamedPrimaryKeys.push({ oldName: oldColumnName, newName: newColumnName }); } dropColumns.splice(dropColumns.indexOf(oldColumnName), 1); delete newHeaders[newColumnName]; } } } // ✅ Identify added & modified columns for (const [columnName, newColumn] of Object.entries(newHeaders)) { if (!oldHeaders.hasOwnProperty(columnName)) { // New column - needs to be added addColumns[columnName] = newColumn; } else { const oldColumn = oldHeaders[columnName]; let modified = false; let modifiedColumn = { ...oldColumn }; const oldType = oldColumn.type ?? "varchar"; const newType = newColumn.type ?? "varchar"; // ✅ Use `collateTypes()` to determine the best compatible type const recommendedType = (0, columnTypes_1.collateTypes)([oldType, newType]); if (recommendedType !== oldType) { console.warn(`🔄 Converting ${columnName}: ${oldType}${recommendedType}`); modifiedColumn.type = recommendedType; modifiedColumn.previousType = oldType; modified = true; } else { modifiedColumn.type = recommendedType; modifiedColumn.previousType = oldType; } // ✅ Merge column lengths safely const oldLength = oldColumn.length ?? 0; const newLength = newColumn.length ?? 0; const oldDecimal = oldColumn.decimal ?? 0; const newDecimal = newColumn.decimal ?? 0; // ✅ Remove `length` if the new type is in `no_length` if (dialectConfig?.noLength.includes(modifiedColumn.type || newColumn.type || oldColumn.type || "varchar")) { delete modifiedColumn.length; delete modifiedColumn.decimal; } else { if (dialectConfig?.decimals.includes(modifiedColumn.type || newColumn.type || oldColumn.type || "varchar")) { // ✅ If type supports decimals, merge decimal values correctly const oldPreDecimal = oldLength - oldDecimal; const newPreDecimal = newLength - newDecimal; const maxPreDecimal = Math.max(oldPreDecimal, newPreDecimal); const maxDecimal = Math.max(oldDecimal, newDecimal); modifiedColumn.length = maxPreDecimal + maxDecimal; modifiedColumn.decimal = maxDecimal; } else { // ✅ If type does not support decimals, just merge length modifiedColumn.length = Math.max(oldLength, newLength); delete modifiedColumn.decimal; } } // ✅ Allow `NOT NULL` to `NULL`, but not vice versa if (newColumn.allowNull && !oldColumn.allowNull) { modifiedColumn.allowNull = true; nullableColumns.push(columnName); modified = true; } // ✅ Remove unique constraint if it's no longer unique if (oldColumn.unique && !newColumn.unique) { noLongerUnique.push(columnName); } // ✅ Ensure a type is set if (!modifiedColumn.type) { throw new Error(`Missing type for column ${columnName}`); } // ✅ Remove `length` if it's 0 and not required if (modifiedColumn.length === 0) { delete modifiedColumn.length; } // ✅ Ensure decimals only exist where applicable if (!dialectConfig?.decimals.includes(modifiedColumn.type)) { delete modifiedColumn.decimal; } // ✅ Only set modified flag if the length or decimal has changed if (modifiedColumn.length && oldColumn.length && modifiedColumn.length > oldColumn.length) { modified = true; } if (modified) { modifyColumns[columnName] = modifiedColumn; } } } for (const columnName of Object.keys(oldHeaders)) { if (oldHeaders[columnName].primary) { oldPrimaryKeys.push(columnName); } } for (const columnName of Object.keys(newHeaders)) { if (newHeaders[columnName].primary) { newPrimaryKeys.push(columnName); } } // ✅ Identify true primary key changes (excluding length-only modifications) const structuralPrimaryKeyChanges = newPrimaryKeys.filter(pk => !oldPrimaryKeys.includes(pk)); // ✅ Only update primaryKeyChanges if there's an actual key change if (structuralPrimaryKeyChanges.length > 0 || renamedPrimaryKeys.length > 0) { primaryKeyChanges = [...new Set([...oldPrimaryKeys, ...newPrimaryKeys])]; for (const { oldName, newName } of renamedPrimaryKeys) { if (primaryKeyChanges.includes(oldName)) { primaryKeyChanges.push(newName); // ✅ Add new key } } // ✅ Remove old names of renamed primary keys from the final key list for (const { oldName } of renamedPrimaryKeys) { primaryKeyChanges = primaryKeyChanges.filter(pk => pk !== oldName); } } const updatedMetaData = { ...oldHeaders, ...addColumns }; // ✅ Apply modifications for (const col in modifyColumns) { updatedMetaData[col] = modifyColumns[col]; } // ✅ Remove dropped columns for (const col of dropColumns) { delete updatedMetaData[col]; } // ✅ Apply renames for (const { oldName, newName } of renameColumns) { updatedMetaData[newName] = updatedMetaData[oldName]; delete updatedMetaData[oldName]; } return { changes: { addColumns, modifyColumns, dropColumns, renameColumns, nullableColumns, noLongerUnique, primaryKeyChanges, }, updatedMetaData }; }