autosql
Version:
An auto-parser of JSON into SQL.
290 lines (289 loc) • 12.4 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.predictType = predictType;
exports.collateTypes = collateTypes;
exports.updateColumnType = updateColumnType;
const regex_1 = require("../config/regex");
const groupings_1 = require("../config/groupings");
const utilities_1 = require("./utilities");
// Using regex, when provided a data point, predict what data type this will be
function predictType(data) {
try {
if (data === undefined || data === null) {
return null;
}
let currentType = null;
let strData = null;
let json = false;
if (typeof data === "object" && data !== null) {
strData = JSON.stringify(data);
}
else if (typeof data === "string") {
strData = data;
}
else {
strData = String(data); // For non-objects, just convert to string
}
try {
JSON.parse(strData);
json = true;
}
catch (e) { }
// ✅ Detect and normalize numbers
if (regex_1.regexPatterns.number.test(strData) || regex_1.regexPatterns.decimal.test(strData)) {
strData = (0, utilities_1.normalizeNumber)(strData);
if (!strData) {
console.log('!strData - varchar');
return "varchar"; // Invalid format
}
}
if (regex_1.regexPatterns.boolean.test(strData)) {
currentType = "boolean";
}
else if (regex_1.regexPatterns.binary.test(strData)) {
currentType = "binary";
}
else if (regex_1.regexPatterns.number.test(strData)) {
currentType = "int";
}
else if (regex_1.regexPatterns.decimal.test(strData)) {
currentType = "decimal";
}
else if (regex_1.regexPatterns.exponential.test(strData)) {
currentType = "exponential";
}
else if (regex_1.regexPatterns.datetimetz.test(strData)) {
currentType = "datetimetz";
}
else if (regex_1.regexPatterns.datetime.test(strData)) {
currentType = "datetime";
}
else if (regex_1.regexPatterns.date.test(strData)) {
currentType = "date";
}
else if (regex_1.regexPatterns.time.test(strData)) {
currentType = "time";
}
else if (json) {
currentType = "json";
}
else {
currentType = "varchar";
}
// Handle integer type differentiation
if (currentType === "int") {
const numValue = Number(strData);
// Check if the number is within JavaScript's safe integer range
if (strData.split('.').length > 1) {
currentType = "decimal";
}
else if (!isNaN(numValue) && Number.isSafeInteger(numValue)) {
if (numValue <= 127 && numValue >= -128) {
currentType = "tinyint";
}
else if (numValue <= 32767 && numValue >= -32768) {
currentType = "smallint";
}
else if (numValue <= 2147483647 && numValue >= -2147483648) {
currentType = "int";
}
else if (numValue <= Number.MAX_SAFE_INTEGER && numValue >= -Number.MAX_SAFE_INTEGER) {
currentType = "bigint";
}
}
else {
// Use BigInt for checking larger values safely
try {
const bigIntValue = BigInt(strData);
if (bigIntValue <= 9223372036854775807n && bigIntValue >= -9223372036854775808n) {
currentType = "bigint";
}
else {
currentType = "varchar";
}
}
catch {
currentType = "varchar"; // If conversion fails, it's not a valid number
}
}
}
// Handle text-based types
if (currentType === "json") {
if (strData.length > 4294967295) {
throw new Error("data_too_long: Data is too long for JSON field");
}
return "json"; // ✅ Always keep JSON if detected
}
if (currentType === "varchar") {
const length = strData.length;
if (length < 6553) {
return currentType;
}
else if (length < 65535) {
return "text";
}
else if (length < 16777215) {
return "mediumtext";
}
else if (length < 4294967295) {
return "longtext";
}
else {
throw new Error("data_too_long: Data is too long for longtext field");
}
}
// Handle invalid date cases
if (["datetime", "date", "time"].includes(currentType) && strData === "Invalid Date") {
return null;
}
return currentType;
}
catch (error) {
throw new Error(`Error in predictType: ${error}`);
}
}
function collateTypes(typeSetOrArray) {
try {
if (!typeSetOrArray || (typeSetOrArray instanceof Set ? typeSetOrArray.size === 0 : typeSetOrArray.length === 0)) {
return 'binary';
}
let types;
// Convert set to array and filter out nulls
if (typeSetOrArray instanceof Set) {
types = (0, utilities_1.setToArray)(typeSetOrArray).filter((t) => t !== null);
}
else {
types = typeSetOrArray.filter((t) => t !== null);
}
if (types.length === 0) {
return "binary"; // Default fallback if all inputs were null
}
// If there's only one unique type, return it
const uniqueTypes = [...new Set(types)];
if (uniqueTypes.length === 1) {
return uniqueTypes[0];
}
let overallType = null;
for (const currentType of uniqueTypes) {
if (!overallType) {
overallType = currentType;
continue;
}
if (currentType === overallType) {
continue;
}
let currentTypeGroup = null;
let overallTypeGroup = null;
// Identify the type grouping
if (currentType) {
if (groupings_1.groupings.intGroup.includes(currentType))
currentTypeGroup = "int";
else if (groupings_1.groupings.specialIntGroup.includes(currentType))
currentTypeGroup = "specialInt";
else if (groupings_1.groupings.textGroup.includes(currentType))
currentTypeGroup = "text";
else if (groupings_1.groupings.specialTextGroup.includes(currentType))
currentTypeGroup = "specialText";
else if (groupings_1.groupings.dateGroup.includes(currentType))
currentTypeGroup = "date";
}
if (overallType) {
if (groupings_1.groupings.intGroup.includes(overallType))
overallTypeGroup = "int";
else if (groupings_1.groupings.specialIntGroup.includes(overallType))
overallTypeGroup = "specialInt";
else if (groupings_1.groupings.textGroup.includes(overallType))
overallTypeGroup = "text";
else if (groupings_1.groupings.specialTextGroup.includes(overallType))
overallTypeGroup = "specialText";
else if (groupings_1.groupings.dateGroup.includes(overallType))
overallTypeGroup = "date";
}
let collatedType = null;
// ✅ Handle boolean + binary → binary
if ((currentType === "boolean" && overallType === "binary") || (currentType === "binary" && overallType === "boolean")) {
overallType = "binary";
continue;
}
// ✅ Handle decimal + exponential → exponential
if ((currentType === "decimal" && overallType === "exponential") || (overallType === "decimal" && currentType === "exponential")) {
overallType = "exponential";
continue;
}
// ✅ Handle datetimetz + datetime → datetimetz
if ((currentType === "datetimetz" && overallType === "datetime") || (overallType === "datetimetz" && currentType === "datetime")) {
overallType = "datetimetz";
continue;
}
// Handle different groupings
if (currentTypeGroup !== overallTypeGroup) {
if ((currentType === "exponential" && overallTypeGroup === "int") || (overallType === "exponential" && currentTypeGroup === "int")) {
collatedType = "exponential";
}
else if ((currentType === "double" && overallTypeGroup === "int") || (overallType === "double" && currentTypeGroup === "int")) {
collatedType = "double";
}
else if ((currentType === "decimal" && overallTypeGroup === "int") || (overallType === "decimal" && currentTypeGroup === "int")) {
collatedType = "decimal";
}
else if (overallTypeGroup === "text" || currentTypeGroup === "text") {
for (let i = groupings_1.groupings.textGroup.length - 1; i >= 0; i--) {
if (groupings_1.groupings.textGroup[i] === currentType || groupings_1.groupings.textGroup[i] === overallType) {
collatedType = groupings_1.groupings.textGroup[i];
break;
}
}
}
else if (["specialText", "date"].includes(overallTypeGroup) || ["specialText", "date"].includes(currentTypeGroup)) {
collatedType = "varchar";
}
overallType = collatedType || "varchar";
continue;
}
// Handle similar groupings
if (overallTypeGroup === currentTypeGroup) {
if (overallTypeGroup === "specialInt") {
for (let i = groupings_1.groupings.specialIntGroup.length - 1; i >= 0; i--) {
if (groupings_1.groupings.specialIntGroup[i] === currentType || groupings_1.groupings.specialIntGroup[i] === overallType) {
overallType = groupings_1.groupings.specialIntGroup[i];
break;
}
}
}
else if (overallTypeGroup === "int") {
for (let i = groupings_1.groupings.intGroup.length - 1; i >= 0; i--) {
if (groupings_1.groupings.intGroup[i] === currentType || groupings_1.groupings.intGroup[i] === overallType) {
overallType = groupings_1.groupings.intGroup[i];
break;
}
}
}
else if (overallTypeGroup === "text") {
for (let i = groupings_1.groupings.textGroup.length - 1; i >= 0; i--) {
if (groupings_1.groupings.textGroup[i] === currentType || groupings_1.groupings.textGroup[i] === overallType) {
overallType = groupings_1.groupings.textGroup[i];
break;
}
}
}
else if (overallTypeGroup === "date") {
overallType = "datetime"; // Dates and times should be stored as datetime
}
}
}
return overallType || "varchar";
}
catch (error) {
throw error;
}
}
async function updateColumnType(column, dataPoint) {
const detectedType = predictType(dataPoint);
if (!detectedType) {
column.allowNull = true;
return;
}
if (detectedType !== column.type) {
column.type = collateTypes([detectedType, column.type]);
}
}