UNPKG

mongo2crate

Version:

Sync MongoDB to CrateDB and Convert JSON schema to SQL DDL

166 lines (165 loc) 6.2 kB
import { getDupes } from 'dupes-of-hazard'; import lodash from 'lodash'; import _ from 'lodash/fp.js'; import makeError from 'make-error'; import { minimatch } from 'minimatch'; import { traverseSchema } from 'mongochangestream'; import util from 'node:util'; import { map, walk } from 'obj-walker'; import { arrayStartsWith } from './util.js'; export const Mongo2CrateError = makeError('Mongo2CrateError'); const bsonTypeToSQL = { number: 'BIGINT', // 64-bit double: 'DOUBLE PRECISION', // 64-bit int: 'INTEGER', long: 'BIGINT', decimal: 'DOUBLE PRECISION', objectId: 'TEXT', string: 'TEXT', date: 'TIMESTAMP WITH TIME ZONE', timestamp: 'TIMESTAMP WITH TIME ZONE', bool: 'BOOLEAN', object: 'OBJECT(IGNORED)', null: 'TEXT', undefined: 'TEXT', regex: 'TEXT', symbol: 'TEXT', javascript: 'TEXT', }; const convertType = (bsonType) => { if (Array.isArray(bsonType)) { bsonType = bsonType[0]; } return bsonTypeToSQL[bsonType]; }; const flagToSQL = { indexOff: 'INDEX OFF', columnStoreOff: 'STORAGE WITH (columnstore = false)', notNull: 'NOT NULL', }; const flagsToSql = (flags) => flags && flags.length ? ' ' + flags.map((flag) => flagToSQL[flag]).join(' ') : ''; const renderCommaIf = (cond) => (cond ? ',' : ''); const padding = ' '; const columnPolicy = (policy) => ` WITH (column_policy = '${policy}')`; const getObjectPolicy = (node, strictMode) => { const noAdditionalProps = node.val.additionalProperties === false; if (strictMode && noAdditionalProps) { return 'strict'; } return 'dynamic'; }; const _convertSchema = (nodes, strictMode = false, spacing = '') => { let returnVal = ''; while (true) { const node = nodes[0]; if (!node) { return returnVal; } const isPrimaryKey = _.equals(node.path, ['id']); const field = node.key === '_items' ? '' : `"${node.key}" `; // Create table if (node.isRoot) { return ('CREATE TABLE IF NOT EXISTS %s (\n' + _convertSchema(nodes.slice(1), strictMode, padding) + ')' + columnPolicy(getObjectPolicy(node, strictMode))); } // Scalar fields, including objects with no defined fields if (node.isLeaf) { const comma = renderCommaIf(nodes.length > 1); const sqlType = convertType(node.val.bsonType); const primary = isPrimaryKey ? ' PRIMARY KEY' : ''; const modifiers = flagsToSql(node.val.flags); returnVal += `${spacing}${field}${sqlType}${primary}${modifiers}${comma}\n`; nodes = nodes.slice(1); } // Arrays and objects else { const index = _.findLastIndex((n) => arrayStartsWith(n.path, node.path), nodes); const childNodes = nodes.slice(1, index + 1); const newSpacing = spacing + padding; const comma = renderCommaIf(nodes.length - childNodes.length > 1); const sqlType = node.val.bsonType === 'array' ? 'ARRAY' : `OBJECT(${getObjectPolicy(node, strictMode).toUpperCase()}) AS`; returnVal += `${spacing}${field}${sqlType} (\n` + _convertSchema(childNodes, strictMode, newSpacing) + `${spacing})${comma}\n`; nodes = nodes.slice(index + 1); } } }; const omitNodes = (nodes, omit) => _.remove(({ path }) => _.find((omitPath) => arrayStartsWith(path, _.toPath(omitPath)), omit), nodes); /** * Applies matching overrides to each node. * * If multiple overrides match the node's path (e.g. `*` and `foo.*` both match * the path `foo.bar`), they are applied in sequence, such that the output of * each override is passed as input to the next. */ const handleOverrides = (nodes, overrides) => { for (const node of nodes) { const stringPath = node.path.join('.'); for (const override of overrides) { const { path, mapper } = override; if (minimatch(stringPath, path)) { lodash.update(node, 'val', (obj) => ({ ...(mapper ? mapper(obj, stringPath) : obj), ...override, })); } } } }; /** * Modify path and key for relevant nodes. */ const handleRename = (nodes, rename) => { for (const dottedPath in rename) { const oldPath = dottedPath.split('.'); const newPath = rename[dottedPath].split('.'); if (!arrayStartsWith(oldPath, newPath.slice(0, -1))) { throw new Mongo2CrateError(`Rename path prefix does not match: ${dottedPath}`); } for (const node of nodes) { if (arrayStartsWith(node.path, oldPath)) { node.path.splice(0, oldPath.length, ...newPath); node.key = node.path.at(-1); } } } const paths = nodes // Remove _items nodes since the paths are always duplicated .filter((node) => node.key !== '_items') .map((node) => node.path); const dupes = getDupes(paths); if (dupes.size) { throw new Mongo2CrateError(`Duplicate paths found: ${Array.from(dupes).join(', ')}`); } }; const cleanupPath = _.update('path', _.pull('_items')); /** * Convert MongoDB JSON schema to CrateDB table DDL. * * There are options that allow you to preprocess nodes, omit fields, rename * fields, and change the BSON type for fields (e.g. when a more specific * numeric type is needed). @see {@link ConvertOptions} for details. */ export const convertSchema = (jsonSchema, qualifiedName, { mapSchema, omit, rename, overrides, strictMode } = {}) => { if (mapSchema) { jsonSchema = map(jsonSchema, mapSchema); } let nodes = walk(jsonSchema, { traverse: traverseSchema }).map(cleanupPath); if (omit) { nodes = omitNodes(nodes, omit); } handleRename(nodes, { ...rename, _id: 'id' }); if (overrides) { handleOverrides(nodes, overrides); } const sqlSchema = _convertSchema(nodes, strictMode); return util.format(sqlSchema, qualifiedName); };