mongoku
Version:
[](https://github.com/huggingface/Mongoku/actions/workflows/ci.yml)
316 lines (313 loc) • 9.95 kB
JavaScript
import 'mongodb';
import { z } from 'zod';
async function getCollectionSchema(client, dbName, colName) {
const db = client.db(dbName);
const collections = await db.listCollections({ name: colName }, { nameOnly: false }).toArray();
const colInfo = collections[0];
if (!colInfo) {
return { hasSchema: false, validator: null, validationLevel: null, validationAction: null };
}
const options = colInfo.options ?? {};
const validator = options.validator ?? null;
const validationLevel = options.validationLevel ?? "strict";
const validationAction = options.validationAction ?? "error";
return {
hasSchema: !!validator && Object.keys(validator).length > 0,
validator,
validationLevel,
validationAction
};
}
function extractJsonSchema(validator) {
if (validator.$jsonSchema && typeof validator.$jsonSchema === "object") {
return validator.$jsonSchema;
}
if (Array.isArray(validator.$and)) {
for (const clause of validator.$and) {
const extracted = extractJsonSchema(clause);
if (extracted) {
return extracted;
}
}
}
return null;
}
const STANDARD_TYPES = /* @__PURE__ */ new Set(["string", "number", "integer", "boolean", "object", "array", "null"]);
const BSON_TYPE_MAP = {
int: "integer",
long: "integer",
double: "number",
bool: "boolean",
decimal: "number",
objectId: "objectId",
date: "date"
};
const CONSTRAINT_KEYWORDS = [
"type",
"bsonType",
"enum",
"const",
"anyOf",
"oneOf",
"allOf",
"$ref",
"properties",
"patternProperties",
"additionalProperties",
"items",
"prefixItems",
"additionalItems",
"not",
"required",
"propertyNames"
];
function isEffectivelyAnySchema(schema) {
if (typeof schema !== "object" || schema === null) {
return false;
}
return !CONSTRAINT_KEYWORDS.some((k) => schema[k] !== void 0);
}
const ANY_NON_UNDEFINED_SCHEMA = {
anyOf: [
{ type: "string" },
{ type: "number" },
{ type: "boolean" },
{ type: "object" },
{ type: "array" },
{ type: "null" }
]
};
function bsonSchemaToStandard(schema) {
if (typeof schema !== "object" || schema === null) {
return schema;
}
const out = { ...schema };
if (out.bsonType) {
const bsonTypeVal = out.bsonType;
delete out.bsonType;
const types = Array.isArray(bsonTypeVal) ? bsonTypeVal : [bsonTypeVal];
const mapped = types.map((t) => BSON_TYPE_MAP[t] ?? (STANDARD_TYPES.has(t) ? t : null)).filter((t) => t !== null);
if (mapped.length === 0) ;
else if (mapped.length === 1) {
const bson = mapped[0];
if (bson === "objectId") {
out.type = "object";
out.required = ["$oid"];
out.properties = { $oid: { type: "string" } };
return out;
}
if (bson === "date") {
out.type = "object";
out.required = ["$date"];
out.properties = { $date: { type: "string" } };
return out;
}
out.type = bson;
} else {
out.anyOf = mapped.map((t) => {
if (t === "objectId") {
return { type: "object", required: ["$oid"], properties: { $oid: { type: "string" } } };
}
if (t === "date") {
return { type: "object", required: ["$date"], properties: { $date: { type: "string" } } };
}
return { type: t };
});
}
}
if (out.properties) {
const requiredKeys = new Set(Array.isArray(out.required) ? out.required : []);
out.properties = Object.fromEntries(
Object.entries(out.properties).map(([k, v]) => {
const converted = bsonSchemaToStandard(v);
if (requiredKeys.has(k) && isEffectivelyAnySchema(converted)) {
return [k, ANY_NON_UNDEFINED_SCHEMA];
}
return [k, converted];
})
);
}
if (out.additionalProperties && typeof out.additionalProperties === "object") {
out.additionalProperties = bsonSchemaToStandard(out.additionalProperties);
}
if (Array.isArray(out.oneOf)) {
out.oneOf = out.oneOf.map((v) => bsonSchemaToStandard(v));
}
if (Array.isArray(out.anyOf)) {
out.anyOf = out.anyOf.map((v) => bsonSchemaToStandard(v));
}
if (Array.isArray(out.allOf)) {
out.allOf = out.allOf.map((v) => bsonSchemaToStandard(v));
}
if (out.items) {
out.items = Array.isArray(out.items) ? out.items.map((v) => bsonSchemaToStandard(v)) : bsonSchemaToStandard(out.items);
}
return out;
}
function normalizeBsonValue(value) {
if (value === null || value === void 0) {
return value;
}
if (typeof value !== "object") {
return value;
}
if (value instanceof Date) {
return { $date: value.toISOString() };
}
if (value.constructor?.name === "ObjectId" && typeof value.toHexString === "function") {
return { $oid: value.toHexString() };
}
if (value.constructor?.name === "Decimal128" && typeof value.toString === "function") {
return { $numberDecimal: value.toString() };
}
if (value.constructor?.name === "Long" && typeof value.toString === "function") {
return { $numberLong: value.toString() };
}
if (Array.isArray(value)) {
return value.map(normalizeBsonValue);
}
const out = {};
for (const [k, v] of Object.entries(value)) {
out[k] = normalizeBsonValue(v);
}
return out;
}
const PRECISE_BSON_NUMERIC_TYPES = /* @__PURE__ */ new Set(["int", "long", "double", "decimal"]);
function schemaUsesPreciseBsonTypes(schema) {
if (typeof schema !== "object" || schema === null) {
return false;
}
if (schema.bsonType) {
const types = Array.isArray(schema.bsonType) ? schema.bsonType : [schema.bsonType];
if (types.some((t) => typeof t === "string" && PRECISE_BSON_NUMERIC_TYPES.has(t))) {
return true;
}
}
for (const key of ["properties", "patternProperties"]) {
const props = schema[key];
if (props && typeof props === "object") {
for (const v of Object.values(props)) {
if (schemaUsesPreciseBsonTypes(v)) {
return true;
}
}
}
}
for (const key of ["items", "additionalProperties", "additionalItems"]) {
const v = schema[key];
if (v && typeof v === "object" && schemaUsesPreciseBsonTypes(v)) {
return true;
}
}
for (const key of ["oneOf", "anyOf", "allOf"]) {
const arr = schema[key];
if (Array.isArray(arr) && arr.some((v) => schemaUsesPreciseBsonTypes(v))) {
return true;
}
}
return false;
}
function buildDocumentValidator(schema) {
let validator = null;
try {
const standardSchema = bsonSchemaToStandard(schema);
validator = z.fromJSONSchema(standardSchema);
} catch {
validator = null;
}
return (doc) => {
if (!validator) {
return ["document does not match schema (could not parse schema with zod)"];
}
try {
const normalized = normalizeBsonValue(doc);
const result = validator.safeParse(normalized);
if (result.success) {
return [];
}
return [z.prettifyError(result.error)];
} catch {
return ["document does not match schema (could not parse schema with zod)"];
}
};
}
async function auditSchemaCompliance(client, dbName, colName, opts) {
const coll = client.db(dbName).collection(colName);
const schemaInfo = await getCollectionSchema(client, dbName, colName);
if (!schemaInfo.hasSchema || !schemaInfo.validator) {
return {
nrecords: 0,
nInvalidDocuments: 0,
nValidDocuments: 0,
compliancePct: 100,
errors: [],
warnings: [],
hasSchema: false,
tookMs: 0
};
}
const jsonSchema = extractJsonSchema(schemaInfo.validator);
if (!jsonSchema) {
return {
nrecords: 0,
nInvalidDocuments: 0,
nValidDocuments: 0,
compliancePct: 100,
errors: [],
warnings: [
"Validator is present but could not extract a $jsonSchema for auditing — validator may use non-schema operators"
],
hasSchema: true,
tookMs: 0
};
}
const start = performance.now();
const aggOptions = {};
if (opts?.readPreference) {
aggOptions.readPreference = opts.readPreference;
}
if (opts?.maxTimeMS) {
aggOptions.maxTimeMS = opts.maxTimeMS;
}
const total = await coll.countDocuments({}, aggOptions);
const nonMatchingResult = await coll.aggregate([{ $match: { $nor: [{ $jsonSchema: jsonSchema }] } }, { $count: "c" }], aggOptions).next().then((r) => r?.c ?? 0).catch(() => null);
if (nonMatchingResult === null) {
return {
nrecords: total,
nInvalidDocuments: 0,
nValidDocuments: total,
compliancePct: 100,
errors: [],
warnings: ["Unable to count non-matching documents (aggregation failed)"],
hasSchema: true,
tookMs: Math.round(performance.now() - start)
};
}
const nInvalidDocuments = nonMatchingResult;
const nValidDocuments = total - nInvalidDocuments;
const compliancePct = total > 0 ? nValidDocuments * 100 / total : 100;
const sampleDocs = await coll.aggregate([{ $match: { $nor: [{ $jsonSchema: jsonSchema }] } }, { $limit: 20 }], aggOptions).toArray();
const fallbackMessage = schemaUsesPreciseBsonTypes(jsonSchema) ? "The validator uses BSON-specific numeric types (int/long/double/decimal) which cannot be distinguished from a JavaScript value alone — try inspecting the document directly in MongoDB." : "Failed to detect validation error";
const validateOne = buildDocumentValidator(jsonSchema);
const errors = sampleDocs.map((doc) => {
const failures = validateOne(doc);
return {
message: failures.length > 0 ? failures.join("; ") : fallbackMessage,
docId: doc._id,
document: doc
};
});
const tookMs = Math.round(performance.now() - start);
return {
nrecords: total,
nInvalidDocuments,
nValidDocuments,
compliancePct,
errors,
warnings: [],
hasSchema: true,
tookMs
};
}
export { auditSchemaCompliance as a, getCollectionSchema as g };
//# sourceMappingURL=schema-BZonjzNJ.js.map