mcard-js
Version:
MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers
84 lines • 3.67 kB
JavaScript
import { BaseValidator, ValidationError } from './BaseValidator';
export class TextValidator extends BaseValidator {
static TEXT_MIME_TYPES = new Set([
'text/plain', 'application/json', 'application/xml',
'text/xml', 'image/svg+xml', 'text/html', 'text/markdown'
]);
canValidate(mimeType) {
return TextValidator.TEXT_MIME_TYPES.has(mimeType);
}
validate(content, mimeType) {
const textContent = this.ensureString(content);
if (mimeType === 'text/plain') {
this.validatePlainText(textContent);
}
else if (mimeType === 'application/json') {
this.validateJson(textContent);
}
else if (['application/xml', 'text/xml', 'image/svg+xml'].includes(mimeType)) {
this.validateXml(textContent);
}
}
ensureString(content) {
if (typeof content === 'string') {
return content;
}
return new TextDecoder().decode(content);
}
validatePlainText(content) {
const trimmed = content.trim();
if (!trimmed) {
throw new ValidationError("Invalid content: empty text");
}
if (trimmed.length < 3) {
throw new ValidationError("Invalid content: too short");
}
// Check for control characters (ord < 32, except newline/tab)
// In JS regex: \x00-\x08\x0b\x0c\x0e-\x1f
// Simplified check similar to Python's all(ord(c) < 32)
// Actually Python checked if ALL characters are control chars.
// Let's match the logic: "Invalid content: contains only control characters"
// Check if string contains ONLY control characters (and whitespace which is < 32 in ascii mostly?)
// Python's ord(c) < 32 includes \n (10), \r (13), \t (9).
// If the string is made up entirely of these and other control chars
// But we already checked !trimmed. so it has non-whitespace.
// Replicating Python: if all(ord(c) < 32 for c in content.strip())
// If trimmed content still has only control chars? (Start of Text, Bell, etc)
// Usually trim() removes whitespace.
// Let's just implement the "likely not plain text" heuristic
// Heuristic: valid plain text should contain some whitespace
// if not any(c.isspace() for c in content)
if (!/\s/.test(content)) {
// Check for multi-word content without spaces
if (content.split(/\s+/).length === 1 && content.length > 20) {
throw new ValidationError("Invalid content: likely not plain text");
}
}
}
validateJson(content) {
try {
// Check for comments
const lines = content.split('\n');
if (lines.some(line => line.trim().startsWith('//'))) {
throw new ValidationError("Invalid JSON content: contains comments");
}
JSON.parse(content);
}
catch (e) {
if (e instanceof ValidationError)
throw e;
throw new ValidationError("Invalid JSON content");
}
}
validateXml(content) {
// Lightweight check to avoid heavy dependencies (like xml2js) or DOMParser env issues
const trimmed = content.trim();
if (!trimmed.startsWith('<') || !trimmed.endsWith('>')) {
throw new ValidationError("Invalid XML content");
}
// Basic check for closing tag
// This is a weak check compared to Python's ElementTree,
// but sufficient for basic validation without adding dependencies.
}
}
//# sourceMappingURL=TextValidator.js.map