mongoose-database-schema
Version:
MongoDB database documentation generator with table schemas and relationships
148 lines (123 loc) • 4.59 kB
JavaScript
class SchemaAnalyzer {
constructor(db) {
this.db = db;
}
async analyzeCollection(collectionName, sampleSize = 100) {
const collection = this.db.collection(collectionName);
const documents = await collection.aggregate([
{ $sample: { size: sampleSize } }
]).toArray();
if (documents.length === 0) {
return { fields: {}, documentCount: 0 };
}
const schema = this.buildSchema(documents);
const documentCount = await collection.countDocuments();
return {
collectionName,
documentCount,
sampleSize: documents.length,
fields: schema,
indexes: await this.getIndexes(collectionName)
};
}
buildSchema(documents) {
const schema = {};
documents.forEach(doc => {
this.analyzeDocument(doc, schema, '');
});
for (const field in schema) {
const fieldInfo = schema[field];
// For dynamic fields (ending with .*), cap the frequency at 100%
if (field.endsWith('.*')) {
fieldInfo.frequency = Math.min(100, (fieldInfo.count / documents.length * 100)).toFixed(2) + '%';
fieldInfo.required = false; // Dynamic fields are never required
fieldInfo.description = 'Dynamic object with variable keys';
} else {
fieldInfo.frequency = (fieldInfo.count / documents.length * 100).toFixed(2) + '%';
fieldInfo.required = fieldInfo.count === documents.length;
}
fieldInfo.types = [...fieldInfo.typeSet];
fieldInfo.nullable = fieldInfo.typeSet.has('null');
// Remove example data - keep only schema metadata
delete fieldInfo.typeSet;
delete fieldInfo.count;
delete fieldInfo.examples;
}
return schema;
}
analyzeDocument(obj, schema, prefix, depth = 0) {
// Limit nesting depth to avoid showing too much detail
if (depth > 2) return;
for (const key in obj) {
const fullKey = prefix ? `${prefix}.${key}` : key;
const value = obj[key];
const type = this.getValueType(value);
// Skip fields that look like ObjectId values being used as keys
if (this.isObjectIdLikeKey(key)) {
// Instead, show the parent as a dynamic object
const parentKey = prefix || 'root';
if (!schema[`${parentKey}.*`]) {
schema[`${parentKey}.*`] = {
count: 0,
typeSet: new Set()
};
}
schema[`${parentKey}.*`].count++;
schema[`${parentKey}.*`].typeSet.add('object');
continue;
}
// Skip Buffer and function fields entirely as they're not useful for schema documentation
if (type === 'buffer' || type === 'function') {
continue;
}
if (!schema[fullKey]) {
schema[fullKey] = {
count: 0,
typeSet: new Set()
};
}
schema[fullKey].count++;
schema[fullKey].typeSet.add(type);
if (type === 'object' && value !== null) {
this.analyzeDocument(value, schema, fullKey, depth + 1);
} else if (type === 'array' && value.length > 0) {
const firstElement = value[0];
if (typeof firstElement === 'object' && firstElement !== null) {
this.analyzeDocument(firstElement, schema, `${fullKey}[]`, depth + 1);
}
}
}
}
isObjectIdLikeKey(key) {
// Check if the key looks like an ObjectId (24 hex characters)
return /^[0-9a-fA-F]{24}$/.test(key);
}
getValueType(value) {
if (value === null) return 'null';
if (Array.isArray(value)) return 'array';
if (value instanceof Date) return 'date';
if (Buffer.isBuffer(value)) return 'buffer';
if (typeof value === 'function') return 'function';
if (typeof value === 'object' && value.constructor.name === 'ObjectId') return 'objectId';
return typeof value;
}
getExample(value) {
if (value === null) return null;
if (Array.isArray(value)) return `[${value.length} items]`;
if (value instanceof Date) return value.toISOString();
if (typeof value === 'object' && value.constructor.name === 'ObjectId') return value.toString();
if (typeof value === 'string' && value.length > 50) return value.substring(0, 50) + '...';
return value;
}
async getIndexes(collectionName) {
const collection = this.db.collection(collectionName);
const indexes = await collection.indexes();
return indexes.map(index => ({
name: index.name,
keys: index.key,
unique: index.unique || false,
sparse: index.sparse || false
}));
}
}
module.exports = SchemaAnalyzer;