UNPKG

mongoku

Version:

[![CI](https://github.com/huggingface/Mongoku/actions/workflows/ci.yml/badge.svg)](https://github.com/huggingface/Mongoku/actions/workflows/ci.yml)

316 lines (313 loc) 9.95 kB
import 'mongodb'; import { z } from 'zod'; async function getCollectionSchema(client, dbName, colName) { const db = client.db(dbName); const collections = await db.listCollections({ name: colName }, { nameOnly: false }).toArray(); const colInfo = collections[0]; if (!colInfo) { return { hasSchema: false, validator: null, validationLevel: null, validationAction: null }; } const options = colInfo.options ?? {}; const validator = options.validator ?? null; const validationLevel = options.validationLevel ?? "strict"; const validationAction = options.validationAction ?? "error"; return { hasSchema: !!validator && Object.keys(validator).length > 0, validator, validationLevel, validationAction }; } function extractJsonSchema(validator) { if (validator.$jsonSchema && typeof validator.$jsonSchema === "object") { return validator.$jsonSchema; } if (Array.isArray(validator.$and)) { for (const clause of validator.$and) { const extracted = extractJsonSchema(clause); if (extracted) { return extracted; } } } return null; } const STANDARD_TYPES = /* @__PURE__ */ new Set(["string", "number", "integer", "boolean", "object", "array", "null"]); const BSON_TYPE_MAP = { int: "integer", long: "integer", double: "number", bool: "boolean", decimal: "number", objectId: "objectId", date: "date" }; const CONSTRAINT_KEYWORDS = [ "type", "bsonType", "enum", "const", "anyOf", "oneOf", "allOf", "$ref", "properties", "patternProperties", "additionalProperties", "items", "prefixItems", "additionalItems", "not", "required", "propertyNames" ]; function isEffectivelyAnySchema(schema) { if (typeof schema !== "object" || schema === null) { return false; } return !CONSTRAINT_KEYWORDS.some((k) => schema[k] !== void 0); } const ANY_NON_UNDEFINED_SCHEMA = { anyOf: [ { type: "string" }, { type: "number" }, { type: "boolean" }, { type: "object" }, { type: "array" }, { type: "null" } ] }; function bsonSchemaToStandard(schema) { if (typeof schema !== "object" || schema === null) { return schema; } const out = { ...schema }; if (out.bsonType) { const bsonTypeVal = out.bsonType; delete out.bsonType; const types = Array.isArray(bsonTypeVal) ? bsonTypeVal : [bsonTypeVal]; const mapped = types.map((t) => BSON_TYPE_MAP[t] ?? (STANDARD_TYPES.has(t) ? t : null)).filter((t) => t !== null); if (mapped.length === 0) ; else if (mapped.length === 1) { const bson = mapped[0]; if (bson === "objectId") { out.type = "object"; out.required = ["$oid"]; out.properties = { $oid: { type: "string" } }; return out; } if (bson === "date") { out.type = "object"; out.required = ["$date"]; out.properties = { $date: { type: "string" } }; return out; } out.type = bson; } else { out.anyOf = mapped.map((t) => { if (t === "objectId") { return { type: "object", required: ["$oid"], properties: { $oid: { type: "string" } } }; } if (t === "date") { return { type: "object", required: ["$date"], properties: { $date: { type: "string" } } }; } return { type: t }; }); } } if (out.properties) { const requiredKeys = new Set(Array.isArray(out.required) ? out.required : []); out.properties = Object.fromEntries( Object.entries(out.properties).map(([k, v]) => { const converted = bsonSchemaToStandard(v); if (requiredKeys.has(k) && isEffectivelyAnySchema(converted)) { return [k, ANY_NON_UNDEFINED_SCHEMA]; } return [k, converted]; }) ); } if (out.additionalProperties && typeof out.additionalProperties === "object") { out.additionalProperties = bsonSchemaToStandard(out.additionalProperties); } if (Array.isArray(out.oneOf)) { out.oneOf = out.oneOf.map((v) => bsonSchemaToStandard(v)); } if (Array.isArray(out.anyOf)) { out.anyOf = out.anyOf.map((v) => bsonSchemaToStandard(v)); } if (Array.isArray(out.allOf)) { out.allOf = out.allOf.map((v) => bsonSchemaToStandard(v)); } if (out.items) { out.items = Array.isArray(out.items) ? out.items.map((v) => bsonSchemaToStandard(v)) : bsonSchemaToStandard(out.items); } return out; } function normalizeBsonValue(value) { if (value === null || value === void 0) { return value; } if (typeof value !== "object") { return value; } if (value instanceof Date) { return { $date: value.toISOString() }; } if (value.constructor?.name === "ObjectId" && typeof value.toHexString === "function") { return { $oid: value.toHexString() }; } if (value.constructor?.name === "Decimal128" && typeof value.toString === "function") { return { $numberDecimal: value.toString() }; } if (value.constructor?.name === "Long" && typeof value.toString === "function") { return { $numberLong: value.toString() }; } if (Array.isArray(value)) { return value.map(normalizeBsonValue); } const out = {}; for (const [k, v] of Object.entries(value)) { out[k] = normalizeBsonValue(v); } return out; } const PRECISE_BSON_NUMERIC_TYPES = /* @__PURE__ */ new Set(["int", "long", "double", "decimal"]); function schemaUsesPreciseBsonTypes(schema) { if (typeof schema !== "object" || schema === null) { return false; } if (schema.bsonType) { const types = Array.isArray(schema.bsonType) ? schema.bsonType : [schema.bsonType]; if (types.some((t) => typeof t === "string" && PRECISE_BSON_NUMERIC_TYPES.has(t))) { return true; } } for (const key of ["properties", "patternProperties"]) { const props = schema[key]; if (props && typeof props === "object") { for (const v of Object.values(props)) { if (schemaUsesPreciseBsonTypes(v)) { return true; } } } } for (const key of ["items", "additionalProperties", "additionalItems"]) { const v = schema[key]; if (v && typeof v === "object" && schemaUsesPreciseBsonTypes(v)) { return true; } } for (const key of ["oneOf", "anyOf", "allOf"]) { const arr = schema[key]; if (Array.isArray(arr) && arr.some((v) => schemaUsesPreciseBsonTypes(v))) { return true; } } return false; } function buildDocumentValidator(schema) { let validator = null; try { const standardSchema = bsonSchemaToStandard(schema); validator = z.fromJSONSchema(standardSchema); } catch { validator = null; } return (doc) => { if (!validator) { return ["document does not match schema (could not parse schema with zod)"]; } try { const normalized = normalizeBsonValue(doc); const result = validator.safeParse(normalized); if (result.success) { return []; } return [z.prettifyError(result.error)]; } catch { return ["document does not match schema (could not parse schema with zod)"]; } }; } async function auditSchemaCompliance(client, dbName, colName, opts) { const coll = client.db(dbName).collection(colName); const schemaInfo = await getCollectionSchema(client, dbName, colName); if (!schemaInfo.hasSchema || !schemaInfo.validator) { return { nrecords: 0, nInvalidDocuments: 0, nValidDocuments: 0, compliancePct: 100, errors: [], warnings: [], hasSchema: false, tookMs: 0 }; } const jsonSchema = extractJsonSchema(schemaInfo.validator); if (!jsonSchema) { return { nrecords: 0, nInvalidDocuments: 0, nValidDocuments: 0, compliancePct: 100, errors: [], warnings: [ "Validator is present but could not extract a $jsonSchema for auditing — validator may use non-schema operators" ], hasSchema: true, tookMs: 0 }; } const start = performance.now(); const aggOptions = {}; if (opts?.readPreference) { aggOptions.readPreference = opts.readPreference; } if (opts?.maxTimeMS) { aggOptions.maxTimeMS = opts.maxTimeMS; } const total = await coll.countDocuments({}, aggOptions); const nonMatchingResult = await coll.aggregate([{ $match: { $nor: [{ $jsonSchema: jsonSchema }] } }, { $count: "c" }], aggOptions).next().then((r) => r?.c ?? 0).catch(() => null); if (nonMatchingResult === null) { return { nrecords: total, nInvalidDocuments: 0, nValidDocuments: total, compliancePct: 100, errors: [], warnings: ["Unable to count non-matching documents (aggregation failed)"], hasSchema: true, tookMs: Math.round(performance.now() - start) }; } const nInvalidDocuments = nonMatchingResult; const nValidDocuments = total - nInvalidDocuments; const compliancePct = total > 0 ? nValidDocuments * 100 / total : 100; const sampleDocs = await coll.aggregate([{ $match: { $nor: [{ $jsonSchema: jsonSchema }] } }, { $limit: 20 }], aggOptions).toArray(); const fallbackMessage = schemaUsesPreciseBsonTypes(jsonSchema) ? "The validator uses BSON-specific numeric types (int/long/double/decimal) which cannot be distinguished from a JavaScript value alone — try inspecting the document directly in MongoDB." : "Failed to detect validation error"; const validateOne = buildDocumentValidator(jsonSchema); const errors = sampleDocs.map((doc) => { const failures = validateOne(doc); return { message: failures.length > 0 ? failures.join("; ") : fallbackMessage, docId: doc._id, document: doc }; }); const tookMs = Math.round(performance.now() - start); return { nrecords: total, nInvalidDocuments, nValidDocuments, compliancePct, errors, warnings: [], hasSchema: true, tookMs }; } export { auditSchemaCompliance as a, getCollectionSchema as g }; //# sourceMappingURL=schema-BZonjzNJ.js.map