UNPKG

jsonschema2ddl

Version:
353 lines (328 loc) 12.4 kB
import * as jsonschema from 'jsonschema'; import fetch from 'node-fetch'; import { FKColumn, Column, Table } from './models'; import { COLUMNS_TYPES_PREFERENCE } from './types'; import { db_column_name, db_table_name, get_one_schema } from './utils'; interface JSONSchemaToDatabaseParams { database_flavor?: string; db_schema_name?: string | undefined; root_table_name?: string; log_level?: string | undefined; abbreviations?: any; // TODO: Implement abbreviations extra_columns?: []; // TODO: Implement extra columns } interface CreateTablesParams { drop_schema?: boolean; drop_tables?: boolean; drop_cascade?: boolean; auto_commit?: boolean; } interface CreateLinksParams { auto_commit?: boolean; } /** * JSONSchemaToDatabase is the base. * * Typically you want to instantiate a `JSONSchemaToPostgres` object and * run :func:`create_tables` to create all the tables. Run :func:`create_links` * to populate all references properly and add foreign keys between tables. * Optionally, you can run :func:`analyze` finally which optimizes the tables. * * Attributes: * schema (Dict): the schema to translate to tables. * database_flavor (str): the flavor of the db. One of Postgres or Redshift. * db_schema_name (str): the name of the schema in the database to create the tables. * root_table_name (str): Name of the root table for the schema. * abbreviations (Dict): Dictionary of abbreviations for columns. * extra_columns (List[Dict]): List of extra columns. * log_level (str): Log level of the deployment. Default 'DEBUG'. */ export class JSONSchemaToDatabase { schema: any; database_flavor: string; db_schema_name: string | undefined; abbreviations: any; extra_columns: []; root_table_name?: string; log_level: string | undefined; table_definitions: Record<string, Table>; constructor(schema: any, { database_flavor = "postgres", db_schema_name, root_table_name = "root", log_level = "DEBUG", abbreviations = {}, extra_columns = [], }: JSONSchemaToDatabaseParams = {}) { this.log_level = log_level || process.env["LOG_LEVEL"]; this.schema = schema; this.database_flavor = database_flavor; this.db_schema_name = db_schema_name; this.root_table_name = db_table_name(root_table_name, this.db_schema_name); this.extra_columns = extra_columns; this.abbreviations = abbreviations; this._validate_schema(); this.table_definitions = this._create_table_definitions(); console.log("Table definitions initialized"); } /** * Validates the jsonschema itself against the `$schema` url. * Currently, some redirections are not supported. * * Raises: * jsonschema.ValidationError: Schema is invalid */ private async _validate_schema() { let metaschema_uri = this.schema["$schema"] || "https://json-schema.org/draft-07/schema"; metaschema_uri = new URL(metaschema_uri).toString(); const res = await fetch(metaschema_uri); const meta_schema = res.json(); jsonschema.validate(this.schema, meta_schema); console.log("Schema is valid"); } /** * Creates the table definitions. * * Returns: * Dict[str, Table]: A dictionary with tables ids and the tables objects to create. */ private _create_table_definitions() { // NOTE: first create empty tables to reference in columns later const table_definitions: Record<string, Table> = {}; const columns_definitions: Record<string, Column> = {}; const schema_definitions: Record<string, any> = this.schema["definitions"] || {}; for (let [name, object_schema] of Object.entries(schema_definitions)) { const ref = object_schema["$id"] || `#/definitions/${name}`; if (!("type" in object_schema)) { object_schema = get_one_schema(object_schema); } if (object_schema["type"] === "object") { const table = new Table({ ref, database_flavor: this.database_flavor, name: db_table_name(name, this.db_schema_name), comment: object_schema["comment"], jsonschema_fields: object_schema, }); table_definitions[table.ref] = table; } else { // NOTE: Create new column for main table let schema_type: string = object_schema["type"]; if ("format" in object_schema && object_schema["format"] in COLUMNS_TYPES_PREFERENCE) { schema_type = object_schema["format"]; } const column = new Column({ name: db_column_name(name), database_flavor: this.database_flavor, jsonschema_type: schema_type, jsonschema_fields: object_schema, }) columns_definitions[ref] = column; } } const root_table = new Table({ ref: "root", database_flavor: this.database_flavor, name: this.root_table_name || '', comment: this.schema["comment"] || "", jsonschema_fields: this.schema, }); table_definitions[root_table.ref] = root_table; for (let [ref, table] of Object.entries(table_definitions)) { table_definitions[ref] = table.expand_columns({ table_definitions, columns_definitions }); } return table_definitions; } /** * Helper method to execute and debug a query. * * Args: * cursor (psycopg2.cursor): Cursor object of the db connection. * query (str): query to execute. * args (List, optional): List of arguments for the execute command. Defaults to None. * query_ok_to_print (bool, optional): Defaults to True. * * @param conn * @param query * @param args * @param query_ok_to_print */ private async _execute(conn: any, query: string, args: string[] = [], query_ok_to_print: boolean = true) { if (query_ok_to_print) { console.log(query); } await conn.query(query, args); } /** * Create the tables for the schema * * Args: * conn (psocopg2.connection): Connection object to the db. * drop_schema (bool, optional): Whether or not drop the schema if exists. * Defaults to False. * drop_tables (bool, optional): Whether or not drop the tables if exists. * Defaults to False. * drop_cascade (bool, optional): Execute drops with cascade. Defaults to True. * auto_commit (bool, optional): autocomit after finishing. Defaults to False. * * @param conn * @param drop_schema * @param drop_tables * @param drop_cascade * @param auto_commit */ async create_tables( conn: any, { drop_schema = false, drop_tables = false, drop_cascade = true, auto_commit = false }: CreateTablesParams = {} ) { console.log(`Creating tables in the schema ${this.db_schema_name}`); if (this.db_schema_name) { if (drop_schema) { console.log(`Dropping schema ${this.db_schema_name}!!`); await this._execute( conn, `DROP SCHEMA IF EXISTS ${this.db_schema_name} ${drop_cascade ? "CASCADE;" : ";"}`, ) } await this._execute(conn, `CREATE SCHEMA IF NOT EXISTS ${this.db_schema_name};`); } console.log(Object.keys(this.table_definitions)); for (const [table_ref, table] of Object.entries(this.table_definitions)) { // FIXME: Move to a separate method console.log(`Trying to create table ${table.name}`); console.log(JSON.stringify(table_ref, null, 2)); console.log(JSON.stringify(table, null, 2)); if (drop_tables) { console.log(`Dropping table ${table.name}!!`); await this._execute( conn, `DROP TABLE IF EXISTS ${table.name} ${drop_cascade ? "CASCADE;" : ";"}`, ) } const all_cols = table.columns.map(col => ` "${col.name}" ${col.data_type}`) || []; const unique_cols = table.columns.filter(col => col.is_unique).map(col => `"${col}"`) || []; const create_q = ( `CREATE TABLE ${table.name} ( ` + `${all_cols.join(',')} ` + `${unique_cols.length ? ", UNIQUE(" + unique_cols?.join(',') + ")" : ""} ` + `${table.primary_key ? ", PRIMARY KEY(" + table.primary_key.name + ")" : ""}); ` ); await this._execute(conn, create_q); if (table.comment) { console.log(`Set the following comment on table ${table.name}: ${table.comment}`); await this._execute(conn, `COMMENT ON TABLE ${table.name} IS '${table.comment}'`); } for (const col of table.columns) { if (col.comment) { console.log(`Set the following comment on column ${col.name}: ${col.comment}`); await this._execute( conn, `COMMENT ON COLUMN ${table.name}."${col.name}" IS '${col.comment}'`, ) } } console.log("Table created!"); } // if (auto_commit) { // conn.commit(); // } } async generate_ddl(include_comments: boolean = false) { console.log(`Creating tables in the schema ${this.db_schema_name}`); console.log(Object.keys(this.table_definitions)); const ddl = []; for (const [table_ref, table] of Object.entries(this.table_definitions)) { // FIXME: Move to a separate method console.log(`Trying to create table ${table.name}`); console.log(JSON.stringify(table_ref, null, 2)); console.log(JSON.stringify(table, null, 2)); const all_cols = table.columns.map(col => ` "${col.name}" ${col.data_type}`) || []; const unique_cols = table.columns.filter(col => col.is_unique).map(col => `"${col}"`) || []; const create_q = ( `CREATE TABLE ${table.name} ( ` + `${all_cols.join(',')} ` + `${unique_cols.length ? ", UNIQUE(" + unique_cols?.join(',') + ")" : ""} ` + `${table.primary_key ? ", PRIMARY KEY(" + table.primary_key.name + ")" : ""}); ` ); ddl.push(create_q); if (include_comments) { if (table.comment) { ddl.push(`COMMENT ON TABLE ${table.name} IS '${table.comment}'`); } for (const col of table.columns) { if (col.comment) { console.log(`Set the following comment on column ${col.name}: ${col.comment}`); ddl.push(`COMMENT ON COLUMN ${table.name}."${col.name}" IS '${col.comment}'`); } } } console.log("Table created!"); } return ddl.join(';\n\n'); } /** * Adds foreign keys between tables. * * Args: * conn(psocopg2.connection): connection object. * auto_commit(bool, Optional): Defaults to False. */ async create_links(conn: any, { auto_commit = true }: CreateLinksParams = {}) { for (const [table_ref, table] of Object.entries(this.table_definitions)) { const columns = table.columns as FKColumn[]; for (const col of columns) { if (col.is_fk) { const fk_q = ( `ALTER TABLE ${table.name} ` + `ADD CONSTRAINT fk_${col.table_ref.name.split('"')[-2]} ` + // FIXME: Formatting hack `FOREIGN KEY (${col.name}) ` + `REFERENCES ${col.table_ref.name} (${col.table_ref.primary_key?.name}); ` ); await this._execute(conn, fk_q); // if (auto_commit) { // conn.commit(); // } } } } } /** * Runs `analyze` on each table. This improves performance. * See the `Postgres documentation for Analyze * <https://www.postgresql.org/docs/9.1/static/sql-analyze.html>`_ * * Args: * conn(psocopg2.connection): connection object. * * @param conn */ async analyze(conn: any) { console.log("Analyzing tables..."); for (const [table_ref, table] of Object.entries(this.table_definitions)) { console.log(`Launch analyze for ${table.name}`); await this._execute(conn, "ANALYZE " + table.name); } } } /** * Shorthand for JSONSchemaToDatabase(..., database_flavor='postgres') */ class JSONSchemaToPostgres extends JSONSchemaToDatabase { constructor(schema: any, params: JSONSchemaToDatabaseParams = {}) { super(schema, { ...params, database_flavor: "postgres" }); } } /** * Shorthand for JSONSchemaToDatabase(..., database_flavor='redshift') */ export class JSONSchemaToRedshift extends JSONSchemaToDatabase { constructor(schema: any, params: JSONSchemaToDatabaseParams = {}) { super(schema, { ...params, database_flavor: "redshift" }); } }