@dbml/connector
Version:
This package was created to fetch the schema JSON from many kind of databases.
385 lines (325 loc) • 11 kB
text/typescript
import { BigQuery } from '@google-cloud/bigquery';
import { loadCredentialFromFile, parseBigQueryCredential } from '../utils/credential-loader';
import {
getIntersection,
getTableSchemaKey,
mergeFieldDictionary,
mergeIndexDictionary,
mergeTableConstraintDictionary,
mergeTables,
} from '../utils/helpers';
import {
BigQueryCredentials,
DatabaseSchema,
DefaultInfo,
DefaultType,
Field,
FieldsDictionary,
Index,
IndexesDictionary,
Table,
TableConstraintsDictionary,
} from './types';
async function connectBigQuery(credential: BigQueryCredentials): Promise<BigQuery> {
const client = new BigQuery({
projectId: credential.projectId,
credentials: {
private_key: credential.credentials.privateKey,
client_email: credential.credentials.clientEmail,
},
});
try {
const query = `SELECT CURRENT_DATE() as today`;
await client.query(query);
return client;
} catch (error) {
if (error instanceof Error) {
throw new Error(`BigQuery connection error: ${error.message}`);
}
throw error;
}
}
function getDbDefault(
columnDefault: string,
defaultValueType: DefaultType | null
): DefaultInfo | null {
if (defaultValueType === null) {
return null;
}
if (defaultValueType === 'string') {
// bigquery store the double qoutes and quotes at the end and the beginning of the string: "string" or 'string'
return { value: columnDefault.slice(1, -1).replaceAll("'", "\'"), type: defaultValueType }
}
return { value: columnDefault, type: defaultValueType };
}
function generateField(row: any): Field {
const {
columnName,
isNullable,
dataType,
columnDefault,
columnDefaultType,
columnDescription,
} = row;
const fieldDefaultValue = getDbDefault(columnDefault, columnDefaultType);
const field: Field = {
name: columnName,
not_null: isNullable === 'NO',
type: {
type_name: dataType,
schemaName: null,
},
note: {
value: columnDescription || '',
},
dbdefault: fieldDefaultValue,
increment: false, // bigquery does not support native auto-increment
};
return field;
}
async function generateTablesAndFields(
client: BigQuery,
projectId: string,
datasetId: string
): Promise<{
tables: Table[];
fieldMap: FieldsDictionary;
}> {
const tableName = `${projectId}.${datasetId}`;
const query = `
with
table_comments as (
select table_schema, table_name, option_value as description
from ${tableName}.INFORMATION_SCHEMA.TABLE_OPTIONS
where option_name = 'description'
),
column_comments as (
select column_name, table_schema, table_name, description
from ${tableName}.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS
where column_name = field_path
)
select
t.table_schema as tableSchema,
t.table_name as tableName,
(select description from table_comments tc where tc.table_name = t.table_name and tc.table_schema = t.table_schema) as tableDescription,
(
select description
from column_comments cc
where cc.table_name = c.table_name and cc.table_schema = c.table_schema and cc.column_name = c.column_name
) as columnDescription,
c.column_name as columnName,
c.is_nullable as isNullable,
c.data_type as dataType,
c.column_default as columnDefault,
CASE
WHEN SAFE_CAST(c.column_default AS FLOAT64) IS NOT NULL THEN 'number'
WHEN SAFE_CAST(c.column_default AS BOOLEAN) IS NOT NULL THEN 'boolean'
-- string value is enclosed in single quotes, double quotes
WHEN REGEXP_CONTAINS(c.column_default, r"^['\\"].*['\\"]$") THEN 'string'
WHEN c.column_default = "NULL" THEN NULL
ELSE 'expression'
END AS columnDefaultType
from ${tableName}.INFORMATION_SCHEMA.TABLES t
join ${tableName}.INFORMATION_SCHEMA.COLUMNS c
on
t.table_schema = c.table_schema
and t.table_name = c.table_name
where t.table_type = 'BASE TABLE'
order by t.creation_time, c.ordinal_position;
`;
const [queryResult] = await client.query({ query });
const tableMap: Record<string, Table> = {};
const fieldMap: FieldsDictionary = {};
queryResult.forEach((row) => {
const { tableSchema, tableName, tableComment } = row;
const key = getTableSchemaKey(tableSchema, tableName);
if (!tableMap[key]) {
tableMap[key] = {
name: tableName,
note: { value: tableComment || '' },
schemaName: tableSchema,
};
}
if (!fieldMap[key]) {
fieldMap[key] = [];
}
const field = generateField(row);
fieldMap[key].push(field);
});
return {
tables: Object.values(tableMap),
fieldMap,
};
}
async function generateIndexes(
client: BigQuery,
projectId: string,
datasetId: string
): Promise<IndexesDictionary> {
const tableName = `${projectId}.${datasetId}`;
const query = `
select
s.index_schema as indexSchema,
s.table_name as tableName,
s.index_name as indexName,
string_agg(sc.index_column_name, ', ') as indexColumnNames
from ${tableName}.INFORMATION_SCHEMA.SEARCH_INDEXES s
join ${tableName}.INFORMATION_SCHEMA.SEARCH_INDEX_COLUMNS sc
on
s.index_schema = sc.index_schema
and s.table_name = sc.table_name
and s.index_name = sc.index_name
where sc.index_column_name = sc.index_field_path
group by s.index_schema, s.table_name, s.index_name
order by s.table_name, s.index_name;
`;
const [queryResult] = await client.query(query);
const indexMap: IndexesDictionary = {};
queryResult.forEach((row) => {
const { indexSchema, tableName, indexName, indexColumnNames } = row;
const indexColumns = indexColumnNames.split(', ').map((column: string) => {
return {
type: 'column',
value: column,
};
});
const index: Index = {
name: indexName,
type: '',
columns: [...indexColumns],
};
const key = getTableSchemaKey(indexSchema, tableName);
if (indexMap[key]) {
indexMap[key].push(index);
} else {
indexMap[key] = [index];
}
});
return indexMap;
}
async function generateTableConstraints(
client: BigQuery,
projectId: string,
datasetId: string
): Promise<{
inlinePrimaryKeyMap: TableConstraintsDictionary;
compositePrimaryKeyMap: IndexesDictionary;
}> {
const tableName = `${projectId}.${datasetId}`;
const query = `
select
tc.table_schema as tableSchema,
tc.table_name as tableName,
tc.constraint_name as constraintName,
string_agg(kcu.column_name, ', ') as columnNames,
count(kcu.column_name) as columnCount,
from ${tableName}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
join ${tableName}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
on
tc.table_schema = kcu.table_schema
and tc.table_name = kcu.table_name
and tc.constraint_name = kcu.constraint_name
where tc.constraint_type = 'PRIMARY KEY'
group by tc.table_schema, tc.table_name, tc.constraint_name
order by tc.table_name, tc.constraint_name;
`;
const [queryResult] = await client.query(query);
const inlinePrimaryKeyList = queryResult.filter(
(row) => row.columnCount && row.columnCount === 1
);
const compositePrimaryKeyList = queryResult.filter(
(row) => row.columnCount && row.columnCount > 1
);
const inlinePrimaryKeyMap: TableConstraintsDictionary = {};
inlinePrimaryKeyList.forEach((inlineKey) => {
const { tableSchema, tableName, columnNames } = inlineKey;
const key = getTableSchemaKey(tableSchema, tableName);
if (!inlinePrimaryKeyMap[key]) {
inlinePrimaryKeyMap[key] = {};
}
if (!inlinePrimaryKeyMap[key][columnNames]) {
inlinePrimaryKeyMap[key][columnNames] = {};
}
inlinePrimaryKeyMap[key][columnNames].pk = true;
});
const compositePrimaryKeyMap: IndexesDictionary = {};
compositePrimaryKeyList.forEach((compositeKey) => {
const { tableSchema, tableName, constraintName, columnNames } = compositeKey;
const indexColumns = columnNames.split(', ').map((column: string) => {
return {
type: 'column',
value: column,
};
});
const index: Index = {
name: constraintName,
type: '',
columns: [...indexColumns],
pk: true,
};
const key = getTableSchemaKey(tableSchema, tableName);
if (compositePrimaryKeyMap[key]) {
compositePrimaryKeyMap[key].push(index);
} else {
compositePrimaryKeyMap[key] = [index];
}
});
return {
inlinePrimaryKeyMap,
compositePrimaryKeyMap,
};
}
async function validateDatasetIdList(client: BigQuery, datasetIdList: string[]): Promise<string[]> {
const [datasets] = await client.getDatasets();
const rawDatasetIdList = datasets.map((dataset) => dataset.id).filter((id) => id) as string[];
// If user does not specify which datasets to be fetched, we will fetch all datasets
if (datasetIdList.length === 0) {
return rawDatasetIdList;
}
return getIntersection(datasetIdList, rawDatasetIdList);
}
async function fetchSchemaJsonByDataset(
client: BigQuery,
projectId: string,
datasetId: string
): Promise<DatabaseSchema> {
const res = await Promise.all([
generateTablesAndFields(client, projectId, datasetId),
generateIndexes(client, projectId, datasetId),
generateTableConstraints(client, projectId, datasetId),
]);
const { tables, fieldMap } = res[0];
const indexMap = res[1];
const { inlinePrimaryKeyMap, compositePrimaryKeyMap } = res[2];
return {
tables,
fields: fieldMap,
refs: [],
enums: [],
indexes: mergeIndexDictionary(indexMap, compositePrimaryKeyMap),
tableConstraints: inlinePrimaryKeyMap,
};
}
async function fetchSchemaJson(keyFilename: string): Promise<DatabaseSchema> {
const rawCredential = await loadCredentialFromFile(keyFilename);
const credentialJson = parseBigQueryCredential(rawCredential);
const client = await connectBigQuery(credentialJson);
const projectId = await client.getProjectId();
const datasetIdList = await validateDatasetIdList(client, credentialJson.datasets);
const querySchemaPromises = datasetIdList.map((dataset) =>
fetchSchemaJsonByDataset(client, projectId, dataset)
);
const databaseSchemaRes = await Promise.all(querySchemaPromises);
const res = databaseSchemaRes.reduce((acc, dbSchema) => {
const { tables, fields, indexes, tableConstraints } = dbSchema;
acc.tables = mergeTables(acc.tables, tables);
acc.fields = mergeFieldDictionary(acc.fields, fields);
acc.indexes = mergeIndexDictionary(acc.indexes, indexes);
acc.tableConstraints = mergeTableConstraintDictionary(acc.tableConstraints, tableConstraints);
return acc;
},
{ tables: [], fields: {}, tableConstraints: {}, indexes: {}, refs: [], enums: [] }
);
return res;
}
export { fetchSchemaJson };