@dbml/connector
Version:
This package was created to fetch the schema JSON from many kind of databases.
655 lines (590 loc) • 20.1 kB
text/typescript
/* eslint-disable camelcase */
import sql from 'mssql';
import { buildSchemaQuery, parseConnectionString } from '../utils/parseSchema';
import {
DatabaseSchema,
DefaultInfo,
DefaultType,
Enum,
Field,
FieldsDictionary,
IndexesDictionary,
Ref,
RefEndpoint,
Table,
TableConstraintsDictionary,
EnumValuesDict,
} from './types';
// https://learn.microsoft.com/en-us/sql/t-sql/data-types/date-and-time-types?view=sql-server-ver15
const MSSQL_DATE_TYPES = [
'date',
'datetime',
'datetime2',
'smalldatetime',
'datetimeoffset',
'time',
];
const getValidatedClient = async (connection: string): Promise<sql.ConnectionPool> => {
const pool = await sql.connect(connection);
try {
// Establish a connection pool
// Validate if the connection is successful by making a simple query
await pool.request().query('SELECT 1');
// If successful, return the pool
return pool;
} catch (err) {
// Ensure to close any open pool in case of failure
if (pool.connected) {
await pool.close();
}
if (err instanceof Error) {
throw new Error(`SQL connection error: ${err.message}`);
}
throw err; // Rethrow error if you want the calling code to handle it
}
};
const convertQueryBoolean = (val: string | null): boolean => val === 'YES';
const getFieldType = (data_type: string, character_maximum_length: number, numeric_precision: number, numeric_scale: number): string => {
if (MSSQL_DATE_TYPES.includes(data_type)) {
return data_type;
}
// timestamp is a synonym of rowversion and we cannot specify the precision for it
// https://learn.microsoft.com/en-us/sql/t-sql/data-types/rowversion-transact-sql?view=sql-server-ver15
if (data_type === 'timestamp') {
return data_type;
}
// process numeric-based type
if (data_type === 'bit') {
return data_type;
}
// if precision != 0 => numeric-based column
if (numeric_precision) {
return numeric_scale
? `${data_type}(${numeric_precision},${numeric_scale})`
: `${data_type}(${numeric_precision})`;
}
// process string-based type
// ntext, text & image
if (['ntext', 'text', 'image'].includes(data_type)) {
return data_type;
}
// Column data type is varchar(max), nvarchar(max), varbinary(max), or xml
if (character_maximum_length < 0) {
return data_type === 'xml' ? data_type : `${data_type}(MAX)`;
}
// character_maximum_length is the lenght in bytes
// nchar and nvarchar store Unicode characters, each character needs 2 bytes
// so we have to divide it by 2 to get the correct maximum lenght in character.
// ref: https://learn.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-ver15
if (character_maximum_length > 0) {
const maximum_length_in_character = (data_type === 'nchar' || data_type === 'nvarchar')
? character_maximum_length / 2
: character_maximum_length;
return `${data_type}(${maximum_length_in_character})`;
}
return data_type;
};
const getDbdefault = (data_type: string, column_default: string, default_type: DefaultType): DefaultInfo => {
// The regex below is used to extract the value from the default value
// \( and \) are used to escape parentheses
// [^()]+ is used to match any character except parentheses
// Example: (1) => 1, ('hello') => hello, getdate()-(1) => getdate()-1
const value = column_default.slice(1, -1).replace(/\(([^()]+)\)/g, '$1');
return {
type: default_type,
value: default_type === 'string' ? value.slice(1, -1) : value, // Remove the quotes for string values
};
};
const getEnumValues = (definition: string, constraint_name: string): EnumValuesDict[] => {
// Use the example below to understand the regex:
// ([quantity]>(0))
// ([unit_price]>(0))
// ([status]='cancelled' OR [status]='delivered' OR [status]='shipped' OR [status]='processing' OR [status]='pending')
// ([total_amount]>(0))
// ([price]>(0))
// ([stock_quantity]>=(0))
// ([age_start]<=[age_end])
// ([age_start]<=[age_end])
// ([gender]='Other' OR [gender]='Female' OR [gender]='Male')
// ([date_of_birth]<=dateadd(year,(-13),getdate()))
// ([email] like '%_@_%._%')
if (!definition) return [];
const values = definition.match(/\[([^\]]+)\]='([^']+)'/g); // Extracting the enum values when the definition contains ]='
if (!values) return [];
const colMap: { [key: string]: string[] } = {};
values.forEach((value) => {
const [columnName, enumValue] = value.split("]='");
const cleanColumnName = columnName.slice(1); // Remove the leading '['
const cleanEnumValue = enumValue.slice(0, -1); // Remove the trailing "'"
if (!colMap[cleanColumnName]) {
colMap[cleanColumnName] = [cleanEnumValue];
} else {
colMap[cleanColumnName].push(cleanEnumValue);
}
});
const arraysEqual = (a: string[], b: string[]): boolean => {
if (a.length !== b.length) return false;
const sortedA = [...a].sort();
const sortedB = [...b].sort();
return sortedA.every((value, index) => value === sortedB[index]);
};
// Please check the comments of the EnumValuesDict type in types.ts to understand the structure
const result: EnumValuesDict[] = [];
const processedKeys = new Set<string>();
Object.keys(colMap).forEach((key) => {
if (processedKeys.has(key)) return;
const mergedColumns = [key];
const colValues = colMap[key];
Object.keys(colMap).forEach((innerKey) => {
if (key !== innerKey && arraysEqual(colValues, colMap[innerKey])) {
mergedColumns.push(innerKey);
processedKeys.add(innerKey);
}
});
const enumValues = colValues.map((value) => ({ name: value }));
result.push({ columns: mergedColumns, enumValues, constraint_name: `${constraint_name}_${mergedColumns.join('_')}` });
processedKeys.add(key);
});
return result;
};
const generateField = (row: Record<string, any>): Field => {
const {
column_name,
data_type,
character_maximum_length,
numeric_precision,
numeric_scale,
identity_increment,
is_nullable,
column_default,
default_type,
column_comment,
} = row;
const dbdefault = column_default && default_type !== 'increment' ? getDbdefault(data_type, column_default, default_type) : null;
const fieldType = {
type_name: getFieldType(data_type, character_maximum_length, numeric_precision, numeric_scale),
schemaName: null,
};
return {
name: column_name,
type: fieldType,
dbdefault,
not_null: !convertQueryBoolean(is_nullable),
increment: !!identity_increment,
note: column_comment ? { value: column_comment } : { value: '' },
};
};
const generateTablesFieldsAndEnums = async (client: sql.ConnectionPool, schemas: string[]): Promise<{
tables: Table[],
fields: FieldsDictionary,
enums: Enum[],
}> => {
const checkConstraintDefinitionDedup: { [key: string]: EnumValuesDict[] } = {};
const fields: FieldsDictionary = {};
const enums: Enum[] = [];
const tablesAndFieldsSql = `
WITH tables_and_fields AS (
SELECT
t.object_id AS table_id,
s.name AS table_schema,
t.name AS table_name,
t.create_date as table_create_date,
c.name AS column_name,
c.column_id as column_id,
ty.name AS data_type,
c.max_length AS character_maximum_length,
c.precision AS numeric_precision,
c.scale AS numeric_scale,
c.is_identity AS identity_increment,
CASE
WHEN c.is_nullable = 1 THEN 'YES'
ELSE 'NO'
END AS is_nullable,
CASE
WHEN c.default_object_id = 0 THEN NULL
ELSE OBJECT_DEFINITION(c.default_object_id)
END AS column_default,
-- Fetching table comments
p.value AS table_comment,
ep.value AS column_comment
FROM
sys.tables t
JOIN sys.schemas s ON t.schema_id = s.schema_id
JOIN sys.columns c ON t.object_id = c.object_id
JOIN sys.types ty ON c.user_type_id = ty.user_type_id
LEFT JOIN sys.extended_properties p ON p.major_id = t.object_id
AND p.name like '%description%'
AND p.minor_id = 0 -- Ensure minor_id is 0 for table comments
LEFT JOIN sys.extended_properties ep ON ep.major_id = c.object_id
AND ep.minor_id = c.column_id
AND ep.name like '%description%'
WHERE
t.type = 'U' -- User-defined tables
),
constraints_with_nulls AS (
SELECT
tf.*,
cc.name AS check_constraint_name,
CASE
WHEN cc.definition LIKE '%[' + tf.column_name + ']%=''%' THEN cc.definition
ELSE NULL
END AS check_constraint_definition
FROM
tables_and_fields AS tf
LEFT JOIN sys.check_constraints cc
ON cc.parent_object_id = OBJECT_ID(tf.table_schema + '.' + tf.table_name)
AND cc.definition LIKE '%' + tf.column_name + '%' -- Ensure the constraint references the column
),
constraints_with_row_number AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY table_schema, table_name, column_name
ORDER BY
CASE
WHEN check_constraint_definition IS NOT NULL THEN 1
ELSE 2
END,
check_constraint_name
) AS rn
FROM
constraints_with_nulls
)
SELECT
table_schema,
table_name,
column_name,
data_type,
character_maximum_length,
numeric_precision,
numeric_scale,
identity_increment,
is_nullable,
column_default,
table_comment,
column_comment,
check_constraint_name,
check_constraint_definition,
CASE
WHEN column_default LIKE '((%))' THEN 'number'
WHEN column_default LIKE '(''%'')' THEN 'string'
ELSE 'expression'
END AS default_type
FROM
constraints_with_row_number
WHERE
rn = 1
${buildSchemaQuery('table_schema', schemas)}
ORDER BY
table_schema,
table_create_date,
table_id,
column_id;
`;
const tablesAndFieldsResult = await client.query(tablesAndFieldsSql);
const tables = tablesAndFieldsResult.recordset.reduce((acc, row) => {
const {
table_schema,
table_name,
table_comment,
check_constraint_name,
check_constraint_definition,
} = row;
const key = `${table_schema}.${table_name}`;
if (!acc[key]) {
acc[key] = {
name: table_name,
schemaName: table_schema,
note: table_comment ? { value: table_comment } : { value: '' },
};
}
if (!fields[key]) fields[key] = [];
const field = generateField(row);
if (check_constraint_definition && !checkConstraintDefinitionDedup[check_constraint_name]) {
const enumValuesByColumns = getEnumValues(check_constraint_definition, check_constraint_name);
enumValuesByColumns.forEach((item) => {
enums.push({
name: item.constraint_name,
schemaName: table_schema,
values: item.enumValues,
});
});
checkConstraintDefinitionDedup[check_constraint_name] = enumValuesByColumns;
}
if (checkConstraintDefinitionDedup[check_constraint_name]) {
const enumValuesByColumns = checkConstraintDefinitionDedup[check_constraint_name];
const columnEnumValues = enumValuesByColumns.find((item) => item.columns.includes(field.name));
if (columnEnumValues) {
field.type = {
type_name: columnEnumValues.constraint_name,
schemaName: table_schema,
};
}
}
fields[key].push(field);
return acc;
}, {});
return {
tables: Object.values(tables),
fields,
enums,
};
};
const generateRefs = async (client: sql.ConnectionPool, schemas: string[]): Promise<Ref[]> => {
const refs: Ref[] = [];
const refsListSql = `
SELECT
s.name AS table_schema,
t.name AS table_name,
fk.name AS fk_constraint_name,
STUFF((
SELECT ',' + c1.name
FROM sys.foreign_key_columns AS fkc
JOIN sys.columns AS c1 ON fkc.parent_object_id = c1.object_id AND fkc.parent_column_id = c1.column_id
WHERE fkc.constraint_object_id = fk.object_id
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '') AS column_names,
s2.name AS foreign_table_schema,
t2.name AS foreign_table_name,
STUFF((
SELECT ',' + c2.name
FROM sys.foreign_key_columns AS fkc
JOIN sys.columns AS c2 ON fkc.referenced_object_id = c2.object_id AND fkc.referenced_column_id = c2.column_id
WHERE fkc.constraint_object_id = fk.object_id
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '') AS foreign_column_names,
fk.type_desc AS constraint_type,
fk.delete_referential_action_desc AS on_delete,
fk.update_referential_action_desc AS on_update
FROM sys.foreign_keys AS fk
JOIN sys.tables AS t ON fk.parent_object_id = t.object_id
JOIN sys.schemas AS s ON t.schema_id = s.schema_id
JOIN sys.tables AS t2 ON fk.referenced_object_id = t2.object_id
JOIN sys.schemas AS s2 ON t2.schema_id = s2.schema_id
WHERE s.name NOT IN ('sys', 'information_schema')
${buildSchemaQuery('s.name', schemas)}
ORDER BY
s.name,
t.name;
`;
const refsQueryResult = await client.query(refsListSql);
const endpointsEqual = (ep1: RefEndpoint[], ep2: RefEndpoint[]): boolean => {
if (ep1.length !== ep2.length) return false;
return ep1.every(
(endpoint, index) => endpoint.tableName === ep2[index].tableName
&& endpoint.schemaName === ep2[index].schemaName
&& endpoint.fieldNames.length === ep2[index].fieldNames.length
&& endpoint.fieldNames.every((field, fieldIndex) => field === ep2[index].fieldNames[fieldIndex]),
);
};
refsQueryResult.recordset.forEach((refRow) => {
const {
table_schema,
fk_constraint_name,
table_name,
column_names,
foreign_table_schema,
foreign_table_name,
foreign_column_names,
on_delete,
on_update,
} = refRow;
const ep1: RefEndpoint = {
tableName: table_name,
schemaName: table_schema,
fieldNames: column_names.split(','),
relation: '*',
};
const ep2: RefEndpoint = {
tableName: foreign_table_name,
schemaName: foreign_table_schema,
fieldNames: foreign_column_names.split(','),
relation: '1',
};
const newRef = {
name: fk_constraint_name,
endpoints: [ep1, ep2],
onDelete: on_delete === 'NO_ACTION' ? null : on_delete,
onUpdate: on_update === 'NO_ACTION' ? null : on_update,
};
const isDuplicate = refs.some(ref => endpointsEqual(ref.endpoints, newRef.endpoints));
if (!isDuplicate) {
refs.push(newRef);
}
});
return refs;
};
const generateIndexes = async (client: sql.ConnectionPool, schemas: string[]) => {
const indexListSql = `
WITH user_tables AS (
SELECT
TABLE_SCHEMA,
TABLE_NAME
FROM
INFORMATION_SCHEMA.TABLES
WHERE
TABLE_TYPE = 'BASE TABLE' -- Ensure we are only getting base tables
AND TABLE_NAME NOT LIKE 'dt%'
AND TABLE_NAME NOT LIKE 'syscs%'
AND TABLE_NAME NOT LIKE 'sysss%'
AND TABLE_NAME NOT LIKE 'sysrs%'
AND TABLE_NAME NOT LIKE 'sysxlgc%'
),
index_info AS (
SELECT
SCHEMA_NAME(t.schema_id) AS table_schema, -- Add schema information
OBJECT_NAME(i.object_id) AS table_name,
i.name AS index_name,
i.is_unique,
CASE
WHEN i.type = 1 THEN 1
ELSE 0
END AS is_primary,
i.type_desc AS index_type,
STUFF((
SELECT
', ' + c.name
FROM
sys.index_columns ic
JOIN sys.columns c ON ic.column_id = c.column_id AND ic.object_id = c.object_id
WHERE
ic.index_id = i.index_id
AND ic.object_id = i.object_id
AND OBJECT_NAME(ic.object_id) IN (SELECT TABLE_NAME FROM user_tables) -- Filter for user tables
ORDER BY
ic.key_ordinal
FOR XML PATH('')
), 1, 2, '') AS columns,
CASE
WHEN i.type = 1 THEN 'PRIMARY KEY'
WHEN i.is_unique = 1 THEN 'UNIQUE'
ELSE NULL
END AS constraint_type
FROM
sys.indexes i
JOIN sys.tables t ON i.object_id = t.object_id
WHERE
t.is_ms_shipped = 0
AND i.type <> 0
)
SELECT
ut.TABLE_SCHEMA AS table_schema,
ut.TABLE_NAME AS table_name,
ii.index_name,
ii.is_unique,
ii.is_primary,
ii.index_type,
ii.columns,
ii.constraint_type
FROM
user_tables ut
LEFT JOIN
index_info ii ON ut.TABLE_NAME = ii.table_name
AND ut.TABLE_SCHEMA = ii.table_schema
WHERE
ii.columns IS NOT NULL
${buildSchemaQuery('ut.TABLE_SCHEMA', schemas)}
ORDER BY
ut.TABLE_NAME,
ii.constraint_type,
ii.index_name;
`;
const indexListResult = await client.query(indexListSql);
const { outOfLineConstraints, inlineConstraints } = indexListResult.recordset.reduce((acc, row) => {
const { constraint_type, columns } = row;
if (columns === 'null' || columns.trim() === '') return acc;
if (constraint_type === 'PRIMARY KEY' || constraint_type === 'UNIQUE') {
acc.inlineConstraints.push(row);
} else {
acc.outOfLineConstraints.push(row);
}
return acc;
}, { outOfLineConstraints: [], inlineConstraints: [] });
const indexes = outOfLineConstraints.reduce((acc: IndexesDictionary, indexRow: Record<string, any>) => {
const {
table_schema,
table_name,
index_name,
index_type,
columns,
expressions,
} = indexRow;
const indexColumns = columns.split(',').map((column: string) => {
return {
type: 'column',
value: column.trim(),
};
});
const indexExpressions = expressions ? expressions.split(',').map((expression: string) => {
return {
type: 'expression',
value: expression,
};
}) : [];
const index = {
name: index_name,
type: index_type,
columns: [
...indexColumns,
...indexExpressions,
],
};
const key = `${table_schema}.${table_name}`;
if (acc[key]) {
acc[key].push(index);
} else {
acc[key] = [index];
}
return acc;
}, {});
const tableConstraints = inlineConstraints.reduce((acc: TableConstraintsDictionary, row: Record<string, any>) => {
const {
table_schema,
table_name,
columns,
constraint_type,
} = row;
const key = `${table_schema}.${table_name}`;
if (!acc[key]) acc[key] = {};
const columnNames = columns.split(',').map((column: string) => column.trim());
columnNames.forEach((columnName: string) => {
if (!acc[key][columnName]) acc[key][columnName] = {};
if (constraint_type === 'PRIMARY KEY') {
acc[key][columnName].pk = true;
}
if (constraint_type === 'UNIQUE' && !acc[key][columnName].pk) {
acc[key][columnName].unique = true;
}
});
return acc;
}, {});
return {
indexes,
tableConstraints,
};
};
const fetchSchemaJson = async (connection: string): Promise<DatabaseSchema> => {
const { connectionString, schemas } = parseConnectionString(connection, 'odbc');
const client = await getValidatedClient(connectionString);
const tablesFieldsAndEnumsRes = generateTablesFieldsAndEnums(client, schemas);
const indexesRes = generateIndexes(client, schemas);
const refsRes = generateRefs(client, schemas);
const res = await Promise.all([
tablesFieldsAndEnumsRes,
indexesRes,
refsRes,
]);
client.close();
const { tables, fields, enums } = res[0];
const { indexes, tableConstraints } = res[1];
const refs = res[2];
return {
tables,
fields,
enums,
refs,
indexes,
tableConstraints,
};
};
export {
fetchSchemaJson,
};