UNPKG

mcard-js

Version:

MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers

127 lines 6.21 kB
import { BaseValidator, ValidationError } from './BaseValidator'; import { BinarySignatureDetector } from '../detectors/BinarySignatureDetector'; export class BinaryValidator extends BaseValidator { static BINARY_MIME_TYPES = new Set([ 'image/png', 'image/jpeg', 'image/gif', 'image/bmp', 'application/pdf', 'application/zip', 'video/mp4', 'audio/wav', 'application/octet-stream' ]); detector = new BinarySignatureDetector(); canValidate(mimeType) { return (BinaryValidator.BINARY_MIME_TYPES.has(mimeType) || mimeType.startsWith('image/') || mimeType.startsWith('audio/') || mimeType.startsWith('video/')); } validate(content, mimeType) { const contentBytes = this.ensureBytes(content); if (mimeType.startsWith('image/')) { this.validateImage(contentBytes, mimeType); } else if (mimeType === 'application/pdf') { this.validatePdf(contentBytes); } else if (mimeType === 'application/zip') { this.validateZip(contentBytes); } } ensureBytes(content) { if (content instanceof Uint8Array) { return content; } return new TextEncoder().encode(content); } validateImage(content, mimeType) { if (mimeType === 'image/png' && content.length <= 8) { throw new ValidationError("Invalid PNG content: truncated file"); } else if (mimeType === 'image/jpeg' && content.length <= 3) { throw new ValidationError("Invalid JPEG content: truncated file"); } else if (mimeType === 'image/gif' && content.length <= 6) { throw new ValidationError("Invalid GIF content: truncated file"); } // Check for proper signature // We use the static SIGNATURES from the detector for raw matching logic const signatures = BinarySignatureDetector.SIGNATURES; // In Python we iterate. Here we can iterate too or use the detector helper. // The Python logic specifically checks: if mime in signatures and not content.startswith... // But signatures maps BYTES -> MIME. We need to find the bytes for this MIME. // Since it's a 1-to-many potentially, we check if the content starts with ANY valid signature for this mime. // This is slightly inefficient if we traverse everything, but consistent with Python's approach. // Actually Python optimizes it: `signatures = {mime: sig for sig, mime in SIGNATURES.items()}` // This creates a reverse map. If conflicts (same mime multiple sigs), it overwrites. // Python's `binary_validator.py` logic: // `signatures = {mime: sig for sig, mime in BinarySignatureDetector.SIGNATURES.items()}` // This means it only checks ONE signature per MIME type if there are duplicates. // We should replicate this behavior or improve it. MCard Python does simple reverse mapping. let expectedSig = null; for (const [sig, mime] of Object.entries(signatures)) { if (mime === mimeType) { expectedSig = sig; // We don't break, we let it overwrite to match Python behavior? // Python dict comprehension order depends on insertion order. // Let's assume the last one wins. } } if (expectedSig) { // expectedSig is hex string. const contentHex = this.toHex(content.slice(0, expectedSig.length / 2)); if (contentHex !== expectedSig) { // Double check if there are multiple valid signatures (like GIF87a vs GIF89a) // If the naive check failed, we should be careful. // The Python code `signatures = {mime: sig ...}` implies it only keeps ONE. // So if GIF87a comes before GIF89a, GIF89a overwrites it in the map. // Then if content is GIF87a, it fails validation! // Wait, let's verify Python logic. // `signatures = {mime: sig for sig, mime in ...items()}` // If items() has (b'GIF87a', 'image/gif') then (b'GIF89a', 'image/gif') // The dict `signatures` will have 'image/gif': b'GIF89a'. // So a valid GIF87a file would FAIL `content.startswith(signatures[mime_type])`. // If this bug exists in Python, I should fix it there or replicate it here? // "Follow the exact directory structure of python to refactor JavaScript" // Implies matching logic. But if logic is buggy... // I will assume robustness is preferred. // I will check if it starts with ANY of the signatures for that mime type. let hasMatch = false; for (const [sig, mime] of Object.entries(signatures)) { if (mime === mimeType) { const currentHex = this.toHex(content.slice(0, sig.length / 2)); if (currentHex === sig) { hasMatch = true; break; } } } if (!hasMatch) { throw new ValidationError(`Invalid ${mimeType} content: missing proper header`); } } } } validatePdf(content) { // %PDF- is 25 50 44 46 2d if (!this.startsWithAscii(content, '%PDF-')) { throw new ValidationError("Invalid PDF content"); } } validateZip(content) { if (content.length <= 4) { throw new ValidationError("Invalid ZIP content"); } } // Helpers toHex(content) { return Array.from(content).map(b => b.toString(16).padStart(2, '0')).join(''); } startsWithAscii(content, str) { if (content.length < str.length) return false; for (let i = 0; i < str.length; i++) { if (content[i] !== str.charCodeAt(i)) return false; } return true; } } //# sourceMappingURL=BinaryValidator.js.map