jsonschema2ddl
Version:
Convert JSON Schema to DDL
353 lines (328 loc) • 12.4 kB
text/typescript
import * as jsonschema from 'jsonschema';
import fetch from 'node-fetch';
import { FKColumn, Column, Table } from './models';
import { COLUMNS_TYPES_PREFERENCE } from './types';
import { db_column_name, db_table_name, get_one_schema } from './utils';
interface JSONSchemaToDatabaseParams {
database_flavor?: string;
db_schema_name?: string | undefined;
root_table_name?: string;
log_level?: string | undefined;
abbreviations?: any; // TODO: Implement abbreviations
extra_columns?: []; // TODO: Implement extra columns
}
interface CreateTablesParams {
drop_schema?: boolean;
drop_tables?: boolean;
drop_cascade?: boolean;
auto_commit?: boolean;
}
interface CreateLinksParams {
auto_commit?: boolean;
}
/**
* JSONSchemaToDatabase is the base.
*
* Typically you want to instantiate a `JSONSchemaToPostgres` object and
* run :func:`create_tables` to create all the tables. Run :func:`create_links`
* to populate all references properly and add foreign keys between tables.
* Optionally, you can run :func:`analyze` finally which optimizes the tables.
*
* Attributes:
* schema (Dict): the schema to translate to tables.
* database_flavor (str): the flavor of the db. One of Postgres or Redshift.
* db_schema_name (str): the name of the schema in the database to create the tables.
* root_table_name (str): Name of the root table for the schema.
* abbreviations (Dict): Dictionary of abbreviations for columns.
* extra_columns (List[Dict]): List of extra columns.
* log_level (str): Log level of the deployment. Default 'DEBUG'.
*/
export class JSONSchemaToDatabase {
schema: any;
database_flavor: string;
db_schema_name: string | undefined;
abbreviations: any;
extra_columns: [];
root_table_name?: string;
log_level: string | undefined;
table_definitions: Record<string, Table>;
constructor(schema: any, {
database_flavor = "postgres",
db_schema_name,
root_table_name = "root",
log_level = "DEBUG",
abbreviations = {},
extra_columns = [],
}: JSONSchemaToDatabaseParams = {}) {
this.log_level = log_level || process.env["LOG_LEVEL"];
this.schema = schema;
this.database_flavor = database_flavor;
this.db_schema_name = db_schema_name;
this.root_table_name = db_table_name(root_table_name, this.db_schema_name);
this.extra_columns = extra_columns;
this.abbreviations = abbreviations;
this._validate_schema();
this.table_definitions = this._create_table_definitions();
console.log("Table definitions initialized");
}
/**
* Validates the jsonschema itself against the `$schema` url.
* Currently, some redirections are not supported.
*
* Raises:
* jsonschema.ValidationError: Schema is invalid
*/
private async _validate_schema() {
let metaschema_uri = this.schema["$schema"] || "https://json-schema.org/draft-07/schema";
metaschema_uri = new URL(metaschema_uri).toString();
const res = await fetch(metaschema_uri);
const meta_schema = res.json();
jsonschema.validate(this.schema, meta_schema);
console.log("Schema is valid");
}
/**
* Creates the table definitions.
*
* Returns:
* Dict[str, Table]: A dictionary with tables ids and the tables objects to create.
*/
private _create_table_definitions() {
// NOTE: first create empty tables to reference in columns later
const table_definitions: Record<string, Table> = {};
const columns_definitions: Record<string, Column> = {};
const schema_definitions: Record<string, any> = this.schema["definitions"] || {};
for (let [name, object_schema] of Object.entries(schema_definitions)) {
const ref = object_schema["$id"] || `#/definitions/${name}`;
if (!("type" in object_schema)) {
object_schema = get_one_schema(object_schema);
}
if (object_schema["type"] === "object") {
const table = new Table({
ref,
database_flavor: this.database_flavor,
name: db_table_name(name, this.db_schema_name),
comment: object_schema["comment"],
jsonschema_fields: object_schema,
});
table_definitions[table.ref] = table;
} else {
// NOTE: Create new column for main table
let schema_type: string = object_schema["type"];
if ("format" in object_schema && object_schema["format"] in COLUMNS_TYPES_PREFERENCE) {
schema_type = object_schema["format"];
}
const column = new Column({
name: db_column_name(name),
database_flavor: this.database_flavor,
jsonschema_type: schema_type,
jsonschema_fields: object_schema,
})
columns_definitions[ref] = column;
}
}
const root_table = new Table({
ref: "root",
database_flavor: this.database_flavor,
name: this.root_table_name || '',
comment: this.schema["comment"] || "",
jsonschema_fields: this.schema,
});
table_definitions[root_table.ref] = root_table;
for (let [ref, table] of Object.entries(table_definitions)) {
table_definitions[ref] =
table.expand_columns({ table_definitions, columns_definitions });
}
return table_definitions;
}
/**
* Helper method to execute and debug a query.
*
* Args:
* cursor (psycopg2.cursor): Cursor object of the db connection.
* query (str): query to execute.
* args (List, optional): List of arguments for the execute command. Defaults to None.
* query_ok_to_print (bool, optional): Defaults to True.
*
* @param conn
* @param query
* @param args
* @param query_ok_to_print
*/
private async _execute(conn: any, query: string, args: string[] = [], query_ok_to_print: boolean = true) {
if (query_ok_to_print) {
console.log(query);
}
await conn.query(query, args);
}
/**
* Create the tables for the schema
*
* Args:
* conn (psocopg2.connection): Connection object to the db.
* drop_schema (bool, optional): Whether or not drop the schema if exists.
* Defaults to False.
* drop_tables (bool, optional): Whether or not drop the tables if exists.
* Defaults to False.
* drop_cascade (bool, optional): Execute drops with cascade. Defaults to True.
* auto_commit (bool, optional): autocomit after finishing. Defaults to False.
*
* @param conn
* @param drop_schema
* @param drop_tables
* @param drop_cascade
* @param auto_commit
*/
async create_tables(
conn: any,
{
drop_schema = false,
drop_tables = false,
drop_cascade = true,
auto_commit = false
}: CreateTablesParams = {}
) {
console.log(`Creating tables in the schema ${this.db_schema_name}`);
if (this.db_schema_name) {
if (drop_schema) {
console.log(`Dropping schema ${this.db_schema_name}!!`);
await this._execute(
conn,
`DROP SCHEMA IF EXISTS ${this.db_schema_name} ${drop_cascade ? "CASCADE;" : ";"}`,
)
}
await this._execute(conn, `CREATE SCHEMA IF NOT EXISTS ${this.db_schema_name};`);
}
console.log(Object.keys(this.table_definitions));
for (const [table_ref, table] of Object.entries(this.table_definitions)) {
// FIXME: Move to a separate method
console.log(`Trying to create table ${table.name}`);
console.log(JSON.stringify(table_ref, null, 2));
console.log(JSON.stringify(table, null, 2));
if (drop_tables) {
console.log(`Dropping table ${table.name}!!`);
await this._execute(
conn,
`DROP TABLE IF EXISTS ${table.name} ${drop_cascade ? "CASCADE;" : ";"}`,
)
}
const all_cols = table.columns.map(col => ` "${col.name}" ${col.data_type}`) || [];
const unique_cols = table.columns.filter(col => col.is_unique).map(col => `"${col}"`) || [];
const create_q = (
`CREATE TABLE ${table.name} ( ` +
`${all_cols.join(',')} ` +
`${unique_cols.length ? ", UNIQUE(" + unique_cols?.join(',') + ")" : ""} ` +
`${table.primary_key ? ", PRIMARY KEY(" + table.primary_key.name + ")" : ""}); `
);
await this._execute(conn, create_q);
if (table.comment) {
console.log(`Set the following comment on table ${table.name}: ${table.comment}`);
await this._execute(conn, `COMMENT ON TABLE ${table.name} IS '${table.comment}'`);
}
for (const col of table.columns) {
if (col.comment) {
console.log(`Set the following comment on column ${col.name}: ${col.comment}`);
await this._execute(
conn,
`COMMENT ON COLUMN ${table.name}."${col.name}" IS '${col.comment}'`,
)
}
}
console.log("Table created!");
}
// if (auto_commit) {
// conn.commit();
// }
}
async generate_ddl(include_comments: boolean = false) {
console.log(`Creating tables in the schema ${this.db_schema_name}`);
console.log(Object.keys(this.table_definitions));
const ddl = [];
for (const [table_ref, table] of Object.entries(this.table_definitions)) {
// FIXME: Move to a separate method
console.log(`Trying to create table ${table.name}`);
console.log(JSON.stringify(table_ref, null, 2));
console.log(JSON.stringify(table, null, 2));
const all_cols = table.columns.map(col => ` "${col.name}" ${col.data_type}`) || [];
const unique_cols = table.columns.filter(col => col.is_unique).map(col => `"${col}"`) || [];
const create_q = (
`CREATE TABLE ${table.name} ( ` +
`${all_cols.join(',')} ` +
`${unique_cols.length ? ", UNIQUE(" + unique_cols?.join(',') + ")" : ""} ` +
`${table.primary_key ? ", PRIMARY KEY(" + table.primary_key.name + ")" : ""}); `
);
ddl.push(create_q);
if (include_comments) {
if (table.comment) {
ddl.push(`COMMENT ON TABLE ${table.name} IS '${table.comment}'`);
}
for (const col of table.columns) {
if (col.comment) {
console.log(`Set the following comment on column ${col.name}: ${col.comment}`);
ddl.push(`COMMENT ON COLUMN ${table.name}."${col.name}" IS '${col.comment}'`);
}
}
}
console.log("Table created!");
}
return ddl.join(';\n\n');
}
/**
* Adds foreign keys between tables.
*
* Args:
* conn(psocopg2.connection): connection object.
* auto_commit(bool, Optional): Defaults to False.
*/
async create_links(conn: any, { auto_commit = true }: CreateLinksParams = {}) {
for (const [table_ref, table] of Object.entries(this.table_definitions)) {
const columns = table.columns as FKColumn[];
for (const col of columns) {
if (col.is_fk) {
const fk_q = (
`ALTER TABLE ${table.name} ` +
`ADD CONSTRAINT fk_${col.table_ref.name.split('"')[-2]} ` + // FIXME: Formatting hack
`FOREIGN KEY (${col.name}) ` +
`REFERENCES ${col.table_ref.name} (${col.table_ref.primary_key?.name}); `
);
await this._execute(conn, fk_q);
// if (auto_commit) {
// conn.commit();
// }
}
}
}
}
/**
* Runs `analyze` on each table. This improves performance.
* See the `Postgres documentation for Analyze
* <https://www.postgresql.org/docs/9.1/static/sql-analyze.html>`_
*
* Args:
* conn(psocopg2.connection): connection object.
*
* @param conn
*/
async analyze(conn: any) {
console.log("Analyzing tables...");
for (const [table_ref, table] of Object.entries(this.table_definitions)) {
console.log(`Launch analyze for ${table.name}`);
await this._execute(conn, "ANALYZE " + table.name);
}
}
}
/**
* Shorthand for JSONSchemaToDatabase(..., database_flavor='postgres')
*/
class JSONSchemaToPostgres extends JSONSchemaToDatabase {
constructor(schema: any, params: JSONSchemaToDatabaseParams = {}) {
super(schema, { ...params, database_flavor: "postgres" });
}
}
/**
* Shorthand for JSONSchemaToDatabase(..., database_flavor='redshift')
*/
export class JSONSchemaToRedshift extends JSONSchemaToDatabase {
constructor(schema: any, params: JSONSchemaToDatabaseParams = {}) {
super(schema, { ...params, database_flavor: "redshift" });
}
}