UNPKG

mcard-js

Version:

MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers

84 lines 3.67 kB
import { BaseValidator, ValidationError } from './BaseValidator'; export class TextValidator extends BaseValidator { static TEXT_MIME_TYPES = new Set([ 'text/plain', 'application/json', 'application/xml', 'text/xml', 'image/svg+xml', 'text/html', 'text/markdown' ]); canValidate(mimeType) { return TextValidator.TEXT_MIME_TYPES.has(mimeType); } validate(content, mimeType) { const textContent = this.ensureString(content); if (mimeType === 'text/plain') { this.validatePlainText(textContent); } else if (mimeType === 'application/json') { this.validateJson(textContent); } else if (['application/xml', 'text/xml', 'image/svg+xml'].includes(mimeType)) { this.validateXml(textContent); } } ensureString(content) { if (typeof content === 'string') { return content; } return new TextDecoder().decode(content); } validatePlainText(content) { const trimmed = content.trim(); if (!trimmed) { throw new ValidationError("Invalid content: empty text"); } if (trimmed.length < 3) { throw new ValidationError("Invalid content: too short"); } // Check for control characters (ord < 32, except newline/tab) // In JS regex: \x00-\x08\x0b\x0c\x0e-\x1f // Simplified check similar to Python's all(ord(c) < 32) // Actually Python checked if ALL characters are control chars. // Let's match the logic: "Invalid content: contains only control characters" // Check if string contains ONLY control characters (and whitespace which is < 32 in ascii mostly?) // Python's ord(c) < 32 includes \n (10), \r (13), \t (9). // If the string is made up entirely of these and other control chars // But we already checked !trimmed. so it has non-whitespace. // Replicating Python: if all(ord(c) < 32 for c in content.strip()) // If trimmed content still has only control chars? (Start of Text, Bell, etc) // Usually trim() removes whitespace. // Let's just implement the "likely not plain text" heuristic // Heuristic: valid plain text should contain some whitespace // if not any(c.isspace() for c in content) if (!/\s/.test(content)) { // Check for multi-word content without spaces if (content.split(/\s+/).length === 1 && content.length > 20) { throw new ValidationError("Invalid content: likely not plain text"); } } } validateJson(content) { try { // Check for comments const lines = content.split('\n'); if (lines.some(line => line.trim().startsWith('//'))) { throw new ValidationError("Invalid JSON content: contains comments"); } JSON.parse(content); } catch (e) { if (e instanceof ValidationError) throw e; throw new ValidationError("Invalid JSON content"); } } validateXml(content) { // Lightweight check to avoid heavy dependencies (like xml2js) or DOMParser env issues const trimmed = content.trim(); if (!trimmed.startsWith('<') || !trimmed.endsWith('>')) { throw new ValidationError("Invalid XML content"); } // Basic check for closing tag // This is a weak check compared to Python's ElementTree, // but sufficient for basic validation without adding dependencies. } } //# sourceMappingURL=TextValidator.js.map