extract-mongo-schema
Version:
Extract (and visualize) schema from Mongo database (including foreign keys)
225 lines (196 loc) • 8.15 kB
JavaScript
const { MongoClient, ObjectId } = require('mongodb');
const connect = async (connectionURL, authSource) => new Promise((resolve, reject) => {
const client = new MongoClient(connectionURL, { authSource, useNewUrlParser: true, useUnifiedTopology: true });
client.connect((err) => {
if (err) throw err;
const db = client.db();
return resolve({ client, db });
});
});
const getSchema = async (url, opts) => {
//console.log(opts);
const { client, db } = await connect(url, opts.authSource);
const l = await db.listCollections();
const collectionInfos = await l.toArray();
const schema = {};
const collections = {};
const relations = {};
const findRelatedCollection = async (value, field) => {
const valueToString = value.toString();
if (relations[valueToString]) {
for (const collectionName in collections) {
if (relations[valueToString].collectionName === collectionName) {
delete field.key;
field.foreignKey = true;
field.references = collectionName;
} else {
field.key = true;
}
}
return;
}
for (const collectionName in collections) {
const related = await collections[collectionName].collection.findOne({ _id: ObjectId(valueToString) }, { projection: { _id: 1 } });
if (related) {
delete field.key;
field.foreignKey = true;
field.references = collectionName;
relations[valueToString] = { collectionName };
} else {
field.key = true;
relations[valueToString] = { collectionName: '' };
}
}
};
const setTypeName = (item) => {
let typeName = typeof item;
if (typeName === 'object') {
typeName = Object.prototype.toString.call(item);
}
typeName = typeName.replace('[object ', '');
typeName = typeName.replace(']', '');
return typeName;
};
const getDocSchema = async (collectionName, doc, docSchema) => {
for (const key in doc) {
if(opts.excludeFields.includes(key)) continue;
if (!docSchema[key]) {
docSchema[key] = { types: {} };
}
if (!docSchema[key].types) {
docSchema[key].types = {};
}
let typeName = setTypeName(doc[key]);
if (!docSchema[key].types[typeName]) {
docSchema[key].types[typeName] = { frequency: 0 };
}
docSchema[key].types[typeName].frequency++;
if (typeName === 'Object' && ObjectId.isValid(doc[key])) {
typeName = 'string';
doc[key] = doc[key].toString();
}
if (typeName === 'string' && ObjectId.isValid(doc[key])) {
if (key === '_id') {
docSchema[key].primaryKey = true;
} else {
// only if is not already processes
if (!docSchema[key].foreignKey || !docSchema[key].references) {
// only if is not ignored
if (!(opts.dontFollowFK.__ANY__[key] || (opts.dontFollowFK[collectionName] && opts.dontFollowFK[collectionName][key]))) {
await findRelatedCollection(doc[key], docSchema[key]);
}
}
}
}
if (typeName === 'Object') {
if (!docSchema[key].types[typeName].structure) {
docSchema[key].types[typeName].structure = {};
}
await getDocSchema(collectionName, doc[key], docSchema[key].types[typeName].structure);
}
if (opts.arrayList && opts.arrayList.indexOf(typeName) !== -1) {
if (!docSchema[key].types[typeName].structure) {
docSchema[key].types[typeName].structure = { types: {} };
}
if (!docSchema[key].types[typeName].structure.types) {
docSchema[key].types[typeName].structure.types = {};
}
for (let i = 0; i < doc[key].length; i++) {
const typeNameArray = setTypeName(doc[key][i]);
if (typeNameArray === 'Object') {
if (!docSchema[key].types[typeName].structure.types[typeNameArray]) {
docSchema[key].types[typeName].structure.types[typeNameArray] = { structure: {} };
}
if (!docSchema[key].types[typeName].structure.types[typeNameArray].structure) {
docSchema[key].types[typeName].structure.types[typeNameArray].structure = {};
}
await getDocSchema(collectionName, doc[key][i], docSchema[key].types[typeName].structure.types[typeNameArray].structure);
} else {
if (!docSchema[key].types[typeName].structure.types[typeNameArray]) {
docSchema[key].types[typeName].structure.types[typeNameArray] = { frequency: 0 };
}
docSchema[key].types[typeName].structure.types[typeNameArray].frequency++;
}
}
}
}
};
const setMostFrequentType = (field, processed) => {
let max = 0;
let notNull = true;
for (const typeName in field.types) {
if (typeName === 'Null') {
notNull = false;
}
field.types[typeName].frequency = field.types[typeName].frequency / processed;
if (field.types[typeName].frequency > max) {
max = field.types[typeName].frequency;
if (typeName !== 'undefined' && typeName !== 'Null') {
field.type = typeName;
}
}
}
return notNull;
};
const mostFrequentType = (docSchema, processed) => {
if (processed) {
for (const fieldName in docSchema) {
if (docSchema[fieldName]) {
let notNull = setMostFrequentType(docSchema[fieldName], processed);
if (!docSchema[fieldName].type) {
docSchema[fieldName].type = 'undefined';
notNull = false;
}
const dataType = docSchema[fieldName].type;
if (dataType === 'Object') {
mostFrequentType(docSchema[fieldName].types[dataType].structure, processed);
docSchema[fieldName].structure = docSchema[fieldName].types[dataType].structure;
}
if (opts.arrayList && opts.arrayList.indexOf(dataType) !== -1) {
if (Object.keys(docSchema[fieldName].types[dataType].structure.types)[0] === 'Object') {
mostFrequentType(docSchema[fieldName].types[dataType].structure.types.Object.structure, processed);
docSchema[fieldName].types[dataType].structure.type = 'Object';
docSchema[fieldName].types[dataType].structure.structure = docSchema[fieldName].types[dataType].structure.types.Object.structure;
delete docSchema[fieldName].types[dataType].structure.types;
} else {
mostFrequentType(docSchema[fieldName].types[dataType], processed);
}
docSchema[fieldName].structure = docSchema[fieldName].types[dataType].structure;
}
delete docSchema[fieldName].types;
docSchema[fieldName].required = notNull;
}
}
}
};
if (opts.collectionList !== null) {
for (let i = collectionInfos.length - 1; i >= 0; i--) {
if (opts.collectionList.indexOf(collectionInfos[i].name) === -1) {
collectionInfos.splice(i, 1);
}
}
}
if (!opts.includeSystem) {
for (let i = collectionInfos.length - 1; i >= 0; i--) {
if (collectionInfos[i].name.startsWith('system.')) {
collectionInfos.splice(i, 1);
}
}
}
await Promise.all(collectionInfos.map(async (collectionInfo, index) => {
collections[collectionInfo.name] = {};
schema[collectionInfo.name] = {};
collections[collectionInfo.name].collection = await db.collection(collectionInfo.name);
const docs = await collections[collectionInfo.name].collection.find({}, { limit: opts.limit }).toArray();
await Promise.all(docs.map(async doc => await getDocSchema(collectionInfo.name, doc, schema[collectionInfo.name])));
if (!opts.raw) mostFrequentType(schema[collectionInfo.name], docs.length);
}));
await client.close();
return schema;
};
const extractMongoSchema = async (url, opts) => getSchema(url, opts);
if (typeof module !== 'undefined' && module.exports) {
module.exports.extractMongoSchema = extractMongoSchema;
} else {
this.extractMongoSchema = extractMongoSchema;
}