autosql
Version:
An auto-parser of JSON into SQL.
420 lines (419 loc) • 19.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.initializeMetaData = initializeMetaData;
exports.getDataHeaders = getDataHeaders;
exports.getMetaData = getMetaData;
exports.compareMetaData = compareMetaData;
const utilities_1 = require("./utilities");
const groupings_1 = require("../config/groupings");
const columnTypes_1 = require("./columnTypes");
const columnTypes_2 = require("./columnTypes");
const defaults_1 = require("../config/defaults");
const keys_1 = require("./keys");
const database_1 = require("../db/database");
const mysqlConfig_1 = require("../db/config/mysqlConfig");
const pgsqlConfig_1 = require("../db/config/pgsqlConfig");
function initializeMetaData(headers) {
try {
return headers.map(header => ({
[header]: {
type: null,
length: 0,
allowNull: false,
unique: false,
index: false,
pseudounique: false,
primary: false,
autoIncrement: false,
default: undefined,
decimal: 0
}
}));
}
catch (error) {
throw new Error(`Error in initializeMetaData: ${error}`);
}
}
async function getDataHeaders(data, databaseConfig) {
const sampling = databaseConfig.sampling;
const samplingMinimum = databaseConfig.samplingMinimum;
let metaData = {};
const allColumns = new Set();
let metaDataInterim = {};
if ((sampling !== undefined || samplingMinimum !== undefined) && (sampling === undefined || samplingMinimum === undefined)) {
throw new Error("Both sampling percentage and sampling minimum must be provided together.");
}
const dialect = databaseConfig.sqlDialect;
const db = database_1.Database.create({ sqlDialect: dialect });
const dialectConfig = db.getDialectConfig();
let sampleData = data;
let remainingData = [];
if (sampling !== undefined && sampling > 0 && samplingMinimum !== undefined && data.length > samplingMinimum) {
let sampleSize = Math.round(data.length * sampling);
sampleSize = Math.max(sampleSize, samplingMinimum); // Ensure minimum sample size
const shuffledData = (0, utilities_1.shuffleArray)(data);
sampleData = shuffledData.slice(0, sampleSize); // Shuffle and take sample
remainingData = shuffledData.slice(sampleSize); // Store remaining data
}
for (const row of sampleData) {
const rowColumns = Object.keys(row);
rowColumns.forEach(column => allColumns.add(column));
for (const column of allColumns) {
const value = row[column];
if (metaData[column] == undefined) {
metaData[column] = {
type: null,
length: 0,
allowNull: false,
unique: false,
index: false,
pseudounique: false,
primary: false,
autoIncrement: false,
default: undefined,
decimal: 0
};
}
if (metaDataInterim[column] == undefined) {
metaDataInterim[column] = {
uniqueSet: new Set(),
valueCount: 0,
nullCount: 0,
types: new Set(),
length: 0,
decimal: 0,
trueMaxDecimal: 0
};
}
if (value === '' || value === null || value === undefined || value === '\\N' || value === 'null') {
metaData[column].allowNull = true;
metaDataInterim[column].nullCount++;
continue;
}
const type = (0, columnTypes_2.predictType)(value);
if (!type)
continue;
const sqlizedValue = (0, utilities_1.sqlize)(value, type, dialectConfig, databaseConfig);
metaDataInterim[column].valueCount++;
metaDataInterim[column].uniqueSet.add(sqlizedValue);
metaDataInterim[column].types.add(type);
if (groupings_1.groupings.intGroup.includes(type) || groupings_1.groupings.specialIntGroup.includes(type)) {
let valueStr = (0, utilities_1.normalizeNumber)(value);
if (!valueStr) {
valueStr = String(value).trim();
}
const decimalLen = valueStr.includes(".") ? valueStr.split(".")[1].length : 0;
const integerLen = valueStr.split(".")[0].length;
metaDataInterim[column].decimal = Math.max(metaDataInterim[column].decimal, decimalLen);
metaDataInterim[column].trueMaxDecimal = Math.max(metaDataInterim[column].trueMaxDecimal, metaDataInterim[column].decimal, decimalLen);
metaDataInterim[column].decimal = Math.min(metaDataInterim[column].decimal, databaseConfig.decimalMaxLength || 10);
metaDataInterim[column].length = Math.max(metaDataInterim[column].length, integerLen + metaDataInterim[column].decimal);
}
else {
metaDataInterim[column].length = Math.max(metaDataInterim[column].length, String(value).length);
}
}
}
for (const column in metaDataInterim) {
const type = (0, columnTypes_1.collateTypes)(metaDataInterim[column].types);
metaDataInterim[column].collated_type = type;
metaData[column].type = type;
metaData[column].length = metaDataInterim[column].length || 0;
metaData[column].decimal = metaDataInterim[column].decimal || 0;
const uniquePercentage = metaDataInterim[column].uniqueSet.size / metaDataInterim[column].valueCount;
if (uniquePercentage == 1 && metaDataInterim[column].uniqueSet.size > 0) {
metaData[column].unique = true;
}
else if (uniquePercentage >= (databaseConfig.pseudoUnique || defaults_1.defaults.pseudoUnique) && metaDataInterim[column].uniqueSet.size > 0) {
metaData[column].pseudounique = true;
}
else if (uniquePercentage <= (databaseConfig.categorical || defaults_1.defaults.categorical) && metaDataInterim[column].uniqueSet.size > 0 && !defaults_1.nonCategoricalTypes.includes(type)) {
metaData[column].categorical = true;
}
else if (metaDataInterim[column].uniqueSet.size == 1 && metaDataInterim[column].nullCount == 0 && metaDataInterim[column].valueCount > 0) {
metaData[column].singleValue = true;
}
if (metaDataInterim[column].nullCount !== 0) {
metaData[column].allowNull = true;
}
if (metaData[column].length > (databaseConfig.maxKeyLength || defaults_1.defaults.maxKeyLength) && metaData[column].unique) {
metaData[column].unique = false;
}
if (metaData[column].type === 'varchar' && metaData[column].length > (databaseConfig.maxVarcharLength || defaults_1.defaults.maxVarcharLength)) {
metaData[column].type = 'text';
}
}
for (const row of remainingData) {
for (const column of allColumns) {
const value = row[column];
const type = metaData[column].type;
if (!type)
continue;
if (groupings_1.groupings.intGroup.includes(type) || groupings_1.groupings.specialIntGroup.includes(type)) {
let valueStr = (0, utilities_1.normalizeNumber)(value);
if (!valueStr) {
valueStr = String(value).trim();
}
const decimalLen = valueStr.includes(".") ? valueStr.split(".")[1].length : 0;
const integerLen = valueStr.split(".")[0].length;
metaDataInterim[column].decimal = Math.max(metaDataInterim[column].decimal, decimalLen);
metaDataInterim[column].trueMaxDecimal = Math.max(metaDataInterim[column].trueMaxDecimal, metaDataInterim[column].decimal, decimalLen);
metaDataInterim[column].decimal = Math.min(metaDataInterim[column].decimal, databaseConfig.decimalMaxLength || 10);
metaDataInterim[column].length = Math.max(metaDataInterim[column].length, integerLen + metaDataInterim[column].decimal);
}
else {
metaDataInterim[column].length = Math.max(metaDataInterim[column].length, String(value).length);
}
}
}
for (const column in metaDataInterim) {
// If type is not decimal, but decimal is set, add + 1 (for the dot) to length and set decimal to 0. Do this to metaDataInterim[column] so that it can be used later.
// Also replace the metaDataInterim[column].decimal with metaDataInterim[column].trueMaxDecimal as if decimals were rounded due to exceeding the max decimal length, we want to keep the true max decimal length when converting to a non-decimal type.
if (!dialectConfig.decimals.includes(metaDataInterim[column].collated_type || 'varchar')) {
metaDataInterim[column].length = metaDataInterim[column].length + (metaDataInterim[column].decimal > 0 ? 1 : 0) - metaDataInterim[column].decimal + metaDataInterim[column].trueMaxDecimal;
metaDataInterim[column].decimal = 0;
}
metaData[column].length = metaDataInterim[column].length || 0;
metaData[column].decimal = metaDataInterim[column].decimal || 0;
}
const excludeBlankColumns = databaseConfig.excludeBlankColumns;
if (excludeBlankColumns) {
const emptyOrNullKeys = Object.entries(metaDataInterim)
.filter(([_, meta]) => meta.uniqueSet.size === 0 &&
meta.valueCount === 0 &&
meta.nullCount > 0)
.map(([key]) => key);
for (const key of emptyOrNullKeys) {
delete metaData[key];
}
}
return metaData;
}
async function getMetaData(databaseOrConfig, data, primaryKey) {
try {
let validatedConfig;
let dbInstance;
let dialectConfig;
// Determine if input is a Database instance or a config object
if (databaseOrConfig instanceof database_1.Database) {
dbInstance = databaseOrConfig;
validatedConfig = (0, utilities_1.validateConfig)(dbInstance.getConfig()); // Use existing Database config
dialectConfig = dbInstance.getDialectConfig();
}
else {
validatedConfig = (0, utilities_1.validateConfig)(databaseOrConfig); // Use provided config
if (validatedConfig.sqlDialect == 'mysql') {
dialectConfig = mysqlConfig_1.mysqlConfig;
}
else if (validatedConfig.sqlDialect == 'pgsql') {
dialectConfig = pgsqlConfig_1.pgsqlConfig;
}
else {
throw new Error(`Unsupported SQL dialect: ${validatedConfig.sqlDialect}`);
}
}
const sqlDialect = validatedConfig.sqlDialect;
if (!sqlDialect) {
throw new Error(`Unsupported SQL dialect: ${sqlDialect}`);
}
const headers = await getDataHeaders(data, validatedConfig);
let metaData;
if (validatedConfig.autoIndexing) {
metaData = (0, keys_1.predictIndexes)(headers, validatedConfig.maxKeyLength, primaryKey || validatedConfig.primaryKey, data);
}
else {
metaData = headers;
}
return metaData;
}
catch (error) {
throw new Error(`Error in getMetaData: ${error}`);
}
}
function compareMetaData(oldHeadersOriginal, newHeadersOriginal, dialectConfig) {
if (!oldHeadersOriginal) {
return {
changes: {
addColumns: {},
modifyColumns: {},
dropColumns: [],
renameColumns: [],
nullableColumns: [],
noLongerUnique: [],
primaryKeyChanges: [],
},
updatedMetaData: newHeadersOriginal
};
}
const newHeaders = JSON.parse(JSON.stringify(newHeadersOriginal));
const oldHeaders = JSON.parse(JSON.stringify(oldHeadersOriginal));
const addColumns = {};
const modifyColumns = {};
const dropColumns = [];
const renameColumns = [];
const nullableColumns = [];
const noLongerUnique = [];
let oldPrimaryKeys = [];
let newPrimaryKeys = [];
let primaryKeyChanges = [];
let renamedPrimaryKeys = [];
// ✅ Identify removed columns
for (const oldColumnName of Object.keys(oldHeaders)) {
if (!newHeaders.hasOwnProperty(oldColumnName)) {
dropColumns.push(oldColumnName);
}
}
// ✅ Identify renamed columns
for (const oldColumnName of Object.keys(oldHeaders)) {
for (const newColumnName of Object.keys(newHeaders)) {
const oldColumn = oldHeaders[oldColumnName];
const newColumn = newHeaders[newColumnName];
if (oldColumnName !== newColumnName &&
!(oldColumnName in newHeaders) &&
!(newColumnName in oldHeaders) &&
JSON.stringify(oldColumn) === JSON.stringify(newColumn)) {
renameColumns.push({ oldName: oldColumnName, newName: newColumnName });
if (oldColumn.primary && newColumn.primary) {
renamedPrimaryKeys.push({ oldName: oldColumnName, newName: newColumnName });
}
dropColumns.splice(dropColumns.indexOf(oldColumnName), 1);
delete newHeaders[newColumnName];
}
}
}
// ✅ Identify added & modified columns
for (const [columnName, newColumn] of Object.entries(newHeaders)) {
if (!oldHeaders.hasOwnProperty(columnName)) {
// New column - needs to be added
addColumns[columnName] = newColumn;
}
else {
const oldColumn = oldHeaders[columnName];
let modified = false;
let modifiedColumn = { ...oldColumn };
const oldType = oldColumn.type ?? "varchar";
const newType = newColumn.type ?? "varchar";
// ✅ Use `collateTypes()` to determine the best compatible type
const recommendedType = (0, columnTypes_1.collateTypes)([oldType, newType]);
if (recommendedType !== oldType) {
console.warn(`🔄 Converting ${columnName}: ${oldType} → ${recommendedType}`);
modifiedColumn.type = recommendedType;
modifiedColumn.previousType = oldType;
modified = true;
}
else {
modifiedColumn.type = recommendedType;
modifiedColumn.previousType = oldType;
}
// ✅ Merge column lengths safely
const oldLength = oldColumn.length ?? 0;
const newLength = newColumn.length ?? 0;
const oldDecimal = oldColumn.decimal ?? 0;
const newDecimal = newColumn.decimal ?? 0;
// ✅ Remove `length` if the new type is in `no_length`
if (dialectConfig?.noLength.includes(modifiedColumn.type || newColumn.type || oldColumn.type || "varchar")) {
delete modifiedColumn.length;
delete modifiedColumn.decimal;
}
else {
if (dialectConfig?.decimals.includes(modifiedColumn.type || newColumn.type || oldColumn.type || "varchar")) {
// ✅ If type supports decimals, merge decimal values correctly
const oldPreDecimal = oldLength - oldDecimal;
const newPreDecimal = newLength - newDecimal;
const maxPreDecimal = Math.max(oldPreDecimal, newPreDecimal);
const maxDecimal = Math.max(oldDecimal, newDecimal);
modifiedColumn.length = maxPreDecimal + maxDecimal;
modifiedColumn.decimal = maxDecimal;
}
else {
// ✅ If type does not support decimals, just merge length
modifiedColumn.length = Math.max(oldLength, newLength);
delete modifiedColumn.decimal;
}
}
// ✅ Allow `NOT NULL` to `NULL`, but not vice versa
if (newColumn.allowNull && !oldColumn.allowNull) {
modifiedColumn.allowNull = true;
nullableColumns.push(columnName);
modified = true;
}
// ✅ Remove unique constraint if it's no longer unique
if (oldColumn.unique && !newColumn.unique) {
noLongerUnique.push(columnName);
}
// ✅ Ensure a type is set
if (!modifiedColumn.type) {
throw new Error(`Missing type for column ${columnName}`);
}
// ✅ Remove `length` if it's 0 and not required
if (modifiedColumn.length === 0) {
delete modifiedColumn.length;
}
// ✅ Ensure decimals only exist where applicable
if (!dialectConfig?.decimals.includes(modifiedColumn.type)) {
delete modifiedColumn.decimal;
}
// ✅ Only set modified flag if the length or decimal has changed
if (modifiedColumn.length && oldColumn.length && modifiedColumn.length > oldColumn.length) {
modified = true;
}
if (modified) {
modifyColumns[columnName] = modifiedColumn;
}
}
}
for (const columnName of Object.keys(oldHeaders)) {
if (oldHeaders[columnName].primary) {
oldPrimaryKeys.push(columnName);
}
}
for (const columnName of Object.keys(newHeaders)) {
if (newHeaders[columnName].primary) {
newPrimaryKeys.push(columnName);
}
}
// ✅ Identify true primary key changes (excluding length-only modifications)
const structuralPrimaryKeyChanges = newPrimaryKeys.filter(pk => !oldPrimaryKeys.includes(pk));
// ✅ Only update primaryKeyChanges if there's an actual key change
if (structuralPrimaryKeyChanges.length > 0 || renamedPrimaryKeys.length > 0) {
primaryKeyChanges = [...new Set([...oldPrimaryKeys, ...newPrimaryKeys])];
for (const { oldName, newName } of renamedPrimaryKeys) {
if (primaryKeyChanges.includes(oldName)) {
primaryKeyChanges.push(newName); // ✅ Add new key
}
}
// ✅ Remove old names of renamed primary keys from the final key list
for (const { oldName } of renamedPrimaryKeys) {
primaryKeyChanges = primaryKeyChanges.filter(pk => pk !== oldName);
}
}
const updatedMetaData = {
...oldHeaders,
...addColumns
};
// ✅ Apply modifications
for (const col in modifyColumns) {
updatedMetaData[col] = modifyColumns[col];
}
// ✅ Remove dropped columns
for (const col of dropColumns) {
delete updatedMetaData[col];
}
// ✅ Apply renames
for (const { oldName, newName } of renameColumns) {
updatedMetaData[newName] = updatedMetaData[oldName];
delete updatedMetaData[oldName];
}
return {
changes: {
addColumns,
modifyColumns,
dropColumns,
renameColumns,
nullableColumns,
noLongerUnique,
primaryKeyChanges,
},
updatedMetaData
};
}