mongo2crate
Version:
Sync MongoDB to CrateDB and Convert JSON schema to SQL DDL
166 lines (165 loc) • 6.2 kB
JavaScript
import { getDupes } from 'dupes-of-hazard';
import lodash from 'lodash';
import _ from 'lodash/fp.js';
import makeError from 'make-error';
import { minimatch } from 'minimatch';
import { traverseSchema } from 'mongochangestream';
import util from 'node:util';
import { map, walk } from 'obj-walker';
import { arrayStartsWith } from './util.js';
export const Mongo2CrateError = makeError('Mongo2CrateError');
const bsonTypeToSQL = {
number: 'BIGINT', // 64-bit
double: 'DOUBLE PRECISION', // 64-bit
int: 'INTEGER',
long: 'BIGINT',
decimal: 'DOUBLE PRECISION',
objectId: 'TEXT',
string: 'TEXT',
date: 'TIMESTAMP WITH TIME ZONE',
timestamp: 'TIMESTAMP WITH TIME ZONE',
bool: 'BOOLEAN',
object: 'OBJECT(IGNORED)',
null: 'TEXT',
undefined: 'TEXT',
regex: 'TEXT',
symbol: 'TEXT',
javascript: 'TEXT',
};
const convertType = (bsonType) => {
if (Array.isArray(bsonType)) {
bsonType = bsonType[0];
}
return bsonTypeToSQL[bsonType];
};
const flagToSQL = {
indexOff: 'INDEX OFF',
columnStoreOff: 'STORAGE WITH (columnstore = false)',
notNull: 'NOT NULL',
};
const flagsToSql = (flags) => flags && flags.length
? ' ' + flags.map((flag) => flagToSQL[flag]).join(' ')
: '';
const renderCommaIf = (cond) => (cond ? ',' : '');
const padding = ' ';
const columnPolicy = (policy) => ` WITH (column_policy = '${policy}')`;
const getObjectPolicy = (node, strictMode) => {
const noAdditionalProps = node.val.additionalProperties === false;
if (strictMode && noAdditionalProps) {
return 'strict';
}
return 'dynamic';
};
const _convertSchema = (nodes, strictMode = false, spacing = '') => {
let returnVal = '';
while (true) {
const node = nodes[0];
if (!node) {
return returnVal;
}
const isPrimaryKey = _.equals(node.path, ['id']);
const field = node.key === '_items' ? '' : `"${node.key}" `;
// Create table
if (node.isRoot) {
return ('CREATE TABLE IF NOT EXISTS %s (\n' +
_convertSchema(nodes.slice(1), strictMode, padding) +
')' +
columnPolicy(getObjectPolicy(node, strictMode)));
}
// Scalar fields, including objects with no defined fields
if (node.isLeaf) {
const comma = renderCommaIf(nodes.length > 1);
const sqlType = convertType(node.val.bsonType);
const primary = isPrimaryKey ? ' PRIMARY KEY' : '';
const modifiers = flagsToSql(node.val.flags);
returnVal += `${spacing}${field}${sqlType}${primary}${modifiers}${comma}\n`;
nodes = nodes.slice(1);
}
// Arrays and objects
else {
const index = _.findLastIndex((n) => arrayStartsWith(n.path, node.path), nodes);
const childNodes = nodes.slice(1, index + 1);
const newSpacing = spacing + padding;
const comma = renderCommaIf(nodes.length - childNodes.length > 1);
const sqlType = node.val.bsonType === 'array'
? 'ARRAY'
: `OBJECT(${getObjectPolicy(node, strictMode).toUpperCase()}) AS`;
returnVal +=
`${spacing}${field}${sqlType} (\n` +
_convertSchema(childNodes, strictMode, newSpacing) +
`${spacing})${comma}\n`;
nodes = nodes.slice(index + 1);
}
}
};
const omitNodes = (nodes, omit) => _.remove(({ path }) => _.find((omitPath) => arrayStartsWith(path, _.toPath(omitPath)), omit), nodes);
/**
* Applies matching overrides to each node.
*
* If multiple overrides match the node's path (e.g. `*` and `foo.*` both match
* the path `foo.bar`), they are applied in sequence, such that the output of
* each override is passed as input to the next.
*/
const handleOverrides = (nodes, overrides) => {
for (const node of nodes) {
const stringPath = node.path.join('.');
for (const override of overrides) {
const { path, mapper } = override;
if (minimatch(stringPath, path)) {
lodash.update(node, 'val', (obj) => ({
...(mapper ? mapper(obj, stringPath) : obj),
...override,
}));
}
}
}
};
/**
* Modify path and key for relevant nodes.
*/
const handleRename = (nodes, rename) => {
for (const dottedPath in rename) {
const oldPath = dottedPath.split('.');
const newPath = rename[dottedPath].split('.');
if (!arrayStartsWith(oldPath, newPath.slice(0, -1))) {
throw new Mongo2CrateError(`Rename path prefix does not match: ${dottedPath}`);
}
for (const node of nodes) {
if (arrayStartsWith(node.path, oldPath)) {
node.path.splice(0, oldPath.length, ...newPath);
node.key = node.path.at(-1);
}
}
}
const paths = nodes
// Remove _items nodes since the paths are always duplicated
.filter((node) => node.key !== '_items')
.map((node) => node.path);
const dupes = getDupes(paths);
if (dupes.size) {
throw new Mongo2CrateError(`Duplicate paths found: ${Array.from(dupes).join(', ')}`);
}
};
const cleanupPath = _.update('path', _.pull('_items'));
/**
* Convert MongoDB JSON schema to CrateDB table DDL.
*
* There are options that allow you to preprocess nodes, omit fields, rename
* fields, and change the BSON type for fields (e.g. when a more specific
* numeric type is needed). @see {@link ConvertOptions} for details.
*/
export const convertSchema = (jsonSchema, qualifiedName, { mapSchema, omit, rename, overrides, strictMode } = {}) => {
if (mapSchema) {
jsonSchema = map(jsonSchema, mapSchema);
}
let nodes = walk(jsonSchema, { traverse: traverseSchema }).map(cleanupPath);
if (omit) {
nodes = omitNodes(nodes, omit);
}
handleRename(nodes, { ...rename, _id: 'id' });
if (overrides) {
handleOverrides(nodes, overrides);
}
const sqlSchema = _convertSchema(nodes, strictMode);
return util.format(sqlSchema, qualifiedName);
};