mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
88 lines • 4.5 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateAnnotatedDatabaseInfo = exports.DatabaseInfoSchema = void 0;
const zod_1 = require("zod");
const generateAnnotatedCollectionSchema_1 = require("./generateAnnotatedCollectionSchema");
const generateHighLevelDbDescriptions_1 = require("./generateHighLevelDbDescriptions");
const getDatabaseMetadata_1 = require("./getDatabaseMetadata");
const braintrust_1 = require("braintrust");
exports.DatabaseInfoSchema = zod_1.z.object({
name: zod_1.z.string().describe("Name of the database"),
description: zod_1.z.string().describe("Brief description of the database"),
latestDate: zod_1.z.date().describe("Latest date in the database"),
collections: zod_1.z.array(zod_1.z.object({
name: zod_1.z.string(),
description: zod_1.z.string(),
schema: zod_1.z.any(),
examples: zod_1.z.array(zod_1.z.any()),
indexes: zod_1.z
.array(zod_1.z.object({
description: zod_1.z.string().optional(),
name: zod_1.z.string(),
key: zod_1.z.any(),
unique: zod_1.z.boolean().optional(),
v: zod_1.z.number().optional(),
background: zod_1.z.boolean().optional(),
"2dsphereIndexVersion": zod_1.z.number().optional(),
}))
.describe("Indexes on the collection."),
})),
});
/**
Generated LLM-annotated information about a MongoDB database.
*/
async function generateAnnotatedDatabaseInfo({ mongoDb: { mongoClient, databaseName, numSamplesPerCollection = 2 }, latestDate = new Date(), llmOptions, openAiClient, }) {
// Get raw database metadata
const databaseMetadata = await (0, getDatabaseMetadata_1.getDatabaseMetadata)({
mongoClient,
databaseName,
numSamplesPerCollection,
latestDate,
});
const generateHighLevelDbDescriptions = (0, generateHighLevelDbDescriptions_1.makeGenerateHighLevelDbDescriptions)(openAiClient);
const generateAnnotatedCollectionSchema = (0, generateAnnotatedCollectionSchema_1.makeGenerateAnnotatedCollectionSchema)(openAiClient);
return (0, braintrust_1.traced)(async () => {
// Generate high-level database descriptions
const highLevelDescriptions = await generateHighLevelDbDescriptions(databaseMetadata, llmOptions);
// Create initial annotated database info
const annotatedDatabaseInfo = {
name: databaseName,
description: highLevelDescriptions.databaseDescription,
latestDate,
collections: databaseMetadata.collections.map((collection, i) => ({
name: collection.collectionName,
description: highLevelDescriptions.collectionDescriptions[i].description,
schema: collection.schema,
examples: collection.exampleDocuments,
indexes: collection.indexes,
})),
};
// Generate detailed schema descriptions for each collection
for (let i = 0; i < annotatedDatabaseInfo.collections.length; i++) {
const annotatedCollection = annotatedDatabaseInfo.collections[i];
const collection = databaseMetadata.collections.find((c) => c.collectionName === annotatedCollection.name);
if (!collection) {
continue;
}
const { typeScriptSchema, indexDescriptions } = await generateAnnotatedCollectionSchema({
collectionMetadata: collection,
databaseMetadata,
llm: llmOptions,
});
// Update the collection's schema with the annotated version
annotatedCollection.schema = typeScriptSchema;
// Update the collection's indexes with the annotated version
for (let j = 0; j < indexDescriptions.length; j++) {
const indexDescription = indexDescriptions[j];
const collectionIndexDescription = annotatedCollection.indexes.find((index) => index.name === indexDescription.name);
if (!collectionIndexDescription) {
continue;
}
collectionIndexDescription.description = indexDescription.description;
}
}
return annotatedDatabaseInfo;
}, { name: "generateAnnotatedDatabaseInfo" });
}
exports.generateAnnotatedDatabaseInfo = generateAnnotatedDatabaseInfo;
//# sourceMappingURL=generateAnnotatedDatabaseInfo.js.map