mcard-js
Version:
MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers
127 lines • 6.21 kB
JavaScript
import { BaseValidator, ValidationError } from './BaseValidator';
import { BinarySignatureDetector } from '../detectors/BinarySignatureDetector';
export class BinaryValidator extends BaseValidator {
static BINARY_MIME_TYPES = new Set([
'image/png', 'image/jpeg', 'image/gif', 'image/bmp',
'application/pdf', 'application/zip', 'video/mp4',
'audio/wav', 'application/octet-stream'
]);
detector = new BinarySignatureDetector();
canValidate(mimeType) {
return (BinaryValidator.BINARY_MIME_TYPES.has(mimeType) ||
mimeType.startsWith('image/') ||
mimeType.startsWith('audio/') ||
mimeType.startsWith('video/'));
}
validate(content, mimeType) {
const contentBytes = this.ensureBytes(content);
if (mimeType.startsWith('image/')) {
this.validateImage(contentBytes, mimeType);
}
else if (mimeType === 'application/pdf') {
this.validatePdf(contentBytes);
}
else if (mimeType === 'application/zip') {
this.validateZip(contentBytes);
}
}
ensureBytes(content) {
if (content instanceof Uint8Array) {
return content;
}
return new TextEncoder().encode(content);
}
validateImage(content, mimeType) {
if (mimeType === 'image/png' && content.length <= 8) {
throw new ValidationError("Invalid PNG content: truncated file");
}
else if (mimeType === 'image/jpeg' && content.length <= 3) {
throw new ValidationError("Invalid JPEG content: truncated file");
}
else if (mimeType === 'image/gif' && content.length <= 6) {
throw new ValidationError("Invalid GIF content: truncated file");
}
// Check for proper signature
// We use the static SIGNATURES from the detector for raw matching logic
const signatures = BinarySignatureDetector.SIGNATURES;
// In Python we iterate. Here we can iterate too or use the detector helper.
// The Python logic specifically checks: if mime in signatures and not content.startswith...
// But signatures maps BYTES -> MIME. We need to find the bytes for this MIME.
// Since it's a 1-to-many potentially, we check if the content starts with ANY valid signature for this mime.
// This is slightly inefficient if we traverse everything, but consistent with Python's approach.
// Actually Python optimizes it: `signatures = {mime: sig for sig, mime in SIGNATURES.items()}`
// This creates a reverse map. If conflicts (same mime multiple sigs), it overwrites.
// Python's `binary_validator.py` logic:
// `signatures = {mime: sig for sig, mime in BinarySignatureDetector.SIGNATURES.items()}`
// This means it only checks ONE signature per MIME type if there are duplicates.
// We should replicate this behavior or improve it. MCard Python does simple reverse mapping.
let expectedSig = null;
for (const [sig, mime] of Object.entries(signatures)) {
if (mime === mimeType) {
expectedSig = sig;
// We don't break, we let it overwrite to match Python behavior?
// Python dict comprehension order depends on insertion order.
// Let's assume the last one wins.
}
}
if (expectedSig) {
// expectedSig is hex string.
const contentHex = this.toHex(content.slice(0, expectedSig.length / 2));
if (contentHex !== expectedSig) {
// Double check if there are multiple valid signatures (like GIF87a vs GIF89a)
// If the naive check failed, we should be careful.
// The Python code `signatures = {mime: sig ...}` implies it only keeps ONE.
// So if GIF87a comes before GIF89a, GIF89a overwrites it in the map.
// Then if content is GIF87a, it fails validation!
// Wait, let's verify Python logic.
// `signatures = {mime: sig for sig, mime in ...items()}`
// If items() has (b'GIF87a', 'image/gif') then (b'GIF89a', 'image/gif')
// The dict `signatures` will have 'image/gif': b'GIF89a'.
// So a valid GIF87a file would FAIL `content.startswith(signatures[mime_type])`.
// If this bug exists in Python, I should fix it there or replicate it here?
// "Follow the exact directory structure of python to refactor JavaScript"
// Implies matching logic. But if logic is buggy...
// I will assume robustness is preferred.
// I will check if it starts with ANY of the signatures for that mime type.
let hasMatch = false;
for (const [sig, mime] of Object.entries(signatures)) {
if (mime === mimeType) {
const currentHex = this.toHex(content.slice(0, sig.length / 2));
if (currentHex === sig) {
hasMatch = true;
break;
}
}
}
if (!hasMatch) {
throw new ValidationError(`Invalid ${mimeType} content: missing proper header`);
}
}
}
}
validatePdf(content) {
// %PDF- is 25 50 44 46 2d
if (!this.startsWithAscii(content, '%PDF-')) {
throw new ValidationError("Invalid PDF content");
}
}
validateZip(content) {
if (content.length <= 4) {
throw new ValidationError("Invalid ZIP content");
}
}
// Helpers
toHex(content) {
return Array.from(content).map(b => b.toString(16).padStart(2, '0')).join('');
}
startsWithAscii(content, str) {
if (content.length < str.length)
return false;
for (let i = 0; i < str.length; i++) {
if (content[i] !== str.charCodeAt(i))
return false;
}
return true;
}
}
//# sourceMappingURL=BinaryValidator.js.map