mcard-js
Version:
MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers
904 lines (893 loc) • 31.2 kB
JavaScript
import {
GTime,
HashValidator
} from "./chunk-7NKII2JA.js";
// src/model/detectors/BinaryDetector.ts
var BinarySignatureDetector = class _BinarySignatureDetector {
contentTypeName = "binary";
// Signatures map: [Signature Bytes, Mime Type]
static SIGNATURES = [
[new Uint8Array([137, 80, 78, 71, 13, 10, 26, 10]), "image/png"],
[new Uint8Array([255, 216, 255]), "image/jpeg"],
[new Uint8Array([71, 73, 70, 56, 55, 97]), "image/gif"],
// GIF87a
[new Uint8Array([71, 73, 70, 56, 57, 97]), "image/gif"],
// GIF89a
[new Uint8Array([66, 77]), "image/bmp"],
// BM
[new Uint8Array([0, 0, 1, 0]), "image/x-icon"],
[new Uint8Array([0, 0, 2, 0]), "image/x-icon"],
[new Uint8Array([37, 80, 68, 70]), "application/pdf"],
// %PDF
[new Uint8Array([80, 75, 3, 4]), "application/zip"],
// PK..
[new Uint8Array([31, 139, 8]), "application/gzip"],
[new Uint8Array([82, 97, 114, 33, 26, 7, 0]), "application/x-rar-compressed"],
[new Uint8Array([55, 122, 188, 175, 39, 28]), "application/x-7z-compressed"],
[new Uint8Array([83, 81, 76, 105, 116, 101, 32, 102, 111, 114, 109, 97, 116, 32, 51, 0]), "application/x-sqlite3"]
];
// Extension-to-MIME mapping for binary types (used when byte detection fails)
static EXT_TO_MIME = {
// Video
".mp4": "video/mp4",
".webm": "video/webm",
".avi": "video/x-msvideo",
".mov": "video/quicktime",
".mkv": "video/x-matroska",
".wmv": "video/x-ms-wmv",
".flv": "video/x-flv",
".m4v": "video/x-m4v",
// Audio
".mp3": "audio/mpeg",
".ogg": "audio/ogg",
".flac": "audio/flac",
".aac": "audio/aac",
".m4a": "audio/mp4",
".wma": "audio/x-ms-wma",
// Images (backup for when signature detection fails)
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".bmp": "image/bmp",
".ico": "image/x-icon",
".webp": "image/webp",
".svg": "image/svg+xml",
// Documents
".pdf": "application/pdf",
// Archives
".zip": "application/zip",
".gz": "application/gzip",
".rar": "application/x-rar-compressed",
".7z": "application/x-7z-compressed",
".tar": "application/x-tar",
// Database
".db": "application/x-sqlite3",
".sqlite": "application/x-sqlite3",
".sqlite3": "application/x-sqlite3",
// Fonts
".woff": "font/woff",
".woff2": "font/woff2",
".ttf": "font/ttf",
".otf": "font/otf",
".eot": "application/vnd.ms-fontobject"
};
detect(contentSample, lines, firstLine, fileExtension) {
const mime = this.getMimeType(contentSample, lines, firstLine, fileExtension);
return mime && mime !== "application/octet-stream" ? 0.95 : 0;
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
const bytes = this.toBytes(contentSample);
const detected = this.detectFromBytes(bytes);
if (detected !== "application/octet-stream") {
return detected;
}
if (fileExtension) {
const ext = fileExtension.toLowerCase().startsWith(".") ? fileExtension.toLowerCase() : `.${fileExtension.toLowerCase()}`;
const extMime = _BinarySignatureDetector.EXT_TO_MIME[ext];
if (extMime) {
return extMime;
}
}
return "application/octet-stream";
}
/**
* Detect MIME type directly from bytes.
*/
detectFromBytes(bytes) {
if (this.startsWith(bytes, new Uint8Array([82, 73, 70, 70]))) {
return this.detectRiffFormat(bytes);
}
for (const [sig, mime] of _BinarySignatureDetector.SIGNATURES) {
if (this.startsWith(bytes, sig)) {
if (mime === "application/zip") {
return this.detectZipType(bytes);
}
return mime;
}
}
return "application/octet-stream";
}
toBytes(content) {
if (content instanceof Uint8Array) return content;
return new TextEncoder().encode(content);
}
startsWith(data, prefix) {
if (data.length < prefix.length) return false;
for (let i = 0; i < prefix.length; i++) {
if (data[i] !== prefix[i]) return false;
}
return true;
}
detectRiffFormat(bytes) {
if (bytes.length < 12) return "application/octet-stream";
const format = new TextDecoder().decode(bytes.slice(8, 12));
if (format === "WAVE") return "audio/wav";
if (format === "WEBP") return "image/webp";
return "application/octet-stream";
}
detectZipType(bytes) {
const header = new TextDecoder().decode(bytes.slice(0, 2048));
if (header.includes("[Content_Types].xml") && header.includes("_rels/.rels")) {
if (header.includes("word/")) return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
if (header.includes("xl/")) return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
if (header.includes("ppt/")) return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
}
return "application/zip";
}
};
// src/model/detectors/LanguageDetector.ts
var ProgrammingLanguageDetector = class {
contentTypeName = "code";
detect(contentSample, lines, firstLine, fileExtension) {
const mime = this.getMimeType(contentSample, lines, firstLine, fileExtension);
return mime && mime !== "text/plain" ? 0.95 : 0;
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
if (this.isPython(firstLine, text, lines)) {
return "text/x-python";
}
const cType = this.detectCFamily(text);
if (cType) return cType;
const jsType = this.detectJsType(text);
if (jsType) return jsType;
if (this.isTypescript(text)) {
return "text/typescript";
}
return "text/plain";
}
isPython(firstLine, text, lines) {
if (/^\s*import\s+(\w+|\w+\.\w+)/m.test(text) || /^\s*from\s+(\w+|\w+\.\w+)\s+import\s+/m.test(text)) {
const stdLibs = ["os", "sys", "re", "json", "math", "random", "datetime"];
if (stdLibs.some((lib) => text.includes(`import ${lib}`) || text.includes(`from ${lib}`))) {
return true;
}
}
if (firstLine.startsWith("#!") && firstLine.toLowerCase().includes("python")) return true;
if (text.includes("if __name__ ==") && text.includes("__main__")) return true;
if (/^\s*def\s+\w+\s*\(/.test(text) && !text.includes("function")) return true;
if (/^\s*class\s+\w+\s*[\(:]/m.test(text)) return true;
if (/^\s*@\w+/m.test(text)) return true;
let count = 0;
const patterns = [
/\bif\b.*?:/,
/\belif\b.*?:/,
/\belse\s*:/,
/\bfor\b.*?\bin\b.*?:/,
/\bwhile\b.*?:/,
/\btry\s*:/,
/\bexcept\b.*?:/,
/\bfinally\s*:/,
/\bNone\b/,
/\bTrue\b/,
/\bFalse\b/,
/f["'].*?\{.*?\}["']/,
// f-string
/\bdef\b/,
/\bclass\b/,
/\bimport\b/,
/\bfrom\b/,
/\blambda\b.*?:/
];
for (const p of patterns) {
if (p.test(text)) count++;
}
const nonEmptyLines = lines.filter((l) => l.trim().length > 0).length;
if (nonEmptyLines <= 5 && count >= 1) {
return true;
}
return count >= 3;
}
detectCFamily(text) {
const cPatterns = [
/#include\s*<.*?>/,
/#include\s*".*?"/,
/\b(int|void|char|float|double)\s+main\s*\(.*\)\s*\{/,
/\bstruct\s+\w+\s*\{/,
/#define\s+\w+/,
/printf\(.*?\);/,
/scanf\(.*?\);/
];
const cppPatterns = [
/\bclass\s+\w+\s*\{/,
/\bnamespace\s+\w+\s*\{/,
/\btemplate\s*<.*?>/,
/::/,
/\bstd::/,
/\bcout\s*<</,
/\bcin\s*>>/,
/\bnew\s+\w+/,
/\bdelete\s+\w+/,
/#include\s*<iostream>/
];
let cCount = 0;
let cppCount = 0;
cPatterns.forEach((p) => {
if (p.test(text)) cCount++;
});
cppPatterns.forEach((p) => {
if (p.test(text)) cppCount++;
});
if (cppCount >= 2 || cppCount >= 1 && text.includes("std::")) return "text/x-c++";
if (cCount >= 2) return "text/x-c";
return null;
}
detectJsType(text) {
const jsPatterns = [
/function\s+\w+\s*\(/.test(text),
// function foo(
/\bconst\s+\w+\s*=/.test(text),
/\blet\s+\w+\s*=/.test(text),
/\bvar\s+\w+\s*=/.test(text),
/\bimport\s+.*\s+from/.test(text),
/\bexport\s+/.test(text),
/\=\>\s*\{/.test(text),
// Arrow func
/console\.log\(/.test(text)
];
const jsxPatterns = [
/<\w+(>|\s+.*?>)[\s\S]*?<\/\w+>/m.test(text),
/<\w+\s+\/>/m.test(text),
/className=/.test(text),
/React\.createElement/.test(text)
];
const jsCount = jsPatterns.filter(Boolean).length;
const jsxCount = jsxPatterns.filter(Boolean).length;
if (jsxCount > 0 && (text.includes("import React") || text.includes('from "react"'))) return "text/jsx";
if (jsxCount >= 2) return "text/jsx";
if (jsCount >= 2) {
const stripped = text.trim();
if (stripped.startsWith("{") && stripped.endsWith("}") || stripped.startsWith("[") && stripped.endsWith("]")) {
try {
JSON.parse(text);
if (jsCount < 2) return null;
} catch {
}
}
return "text/javascript";
}
return null;
}
isTypescript(text) {
const tsPatterns = [
/:\s*(string|number|boolean|any|void|null|undefined)\b/,
/\binterface\s+\w+\s*\{/,
/\bclass\s+\w+\s+implements\s+\w+/,
/\btype\s+\w+\s*=/,
/\b(public|private|protected)\s+/,
/\bnamespace\s+\w+\s*\{/,
/<\w+>/
// Generics (simple check)
];
let count = 0;
tsPatterns.forEach((p) => {
if (p.test(text)) count++;
});
return count >= 2;
}
};
// src/model/detectors/MarkupDetectors.ts
var XMLDetector = class _XMLDetector {
contentTypeName = "xml";
static XML_DECLARATION = /^\s*<\?xml/i;
static BASIC_TAG_PAIR = /<(\w+)[^>]*>.*?<\/\1>/s;
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
let confidence = 0;
if (fileExtension && fileExtension.toLowerCase() === ".xml") {
confidence = Math.max(confidence, 0.95);
}
if (_XMLDetector.XML_DECLARATION.test(firstLine) || text.trim().startsWith("<?xml")) {
confidence = Math.max(confidence, 0.95);
}
if (text.includes("<") && text.includes(">") && text.includes("</")) {
confidence = Math.max(confidence, 0.5);
if (_XMLDetector.BASIC_TAG_PAIR.test(text)) {
confidence = Math.max(confidence, 0.7);
}
}
if (text.toLowerCase().includes("<!doctype html")) {
if (confidence > 0.3) confidence -= 0.4;
}
return Math.min(Math.max(confidence, 0), 1);
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
if (fileExtension === ".xml") return "application/xml";
if (text.toLowerCase().includes("<svg")) return "image/svg+xml";
if (text.toLowerCase().includes("<html") || text.toLowerCase().includes("<!doctype html")) return "text/html";
if (this.detect(contentSample, lines, firstLine, fileExtension) > 0.5) return "application/xml";
return "text/plain";
}
};
var MarkdownDetector = class _MarkdownDetector {
contentTypeName = "markdown";
static MD_PATTERNS = [
/^#{1,6}\s+\S+/,
// ATX Headers
/^\s*[\*\+\-]\s+\S+/,
// List items
/^\s*\d+\.\s+\S+/,
// Ordered list items
/`{1,3}[^`]+`{1,3}/,
// Inline code
/\[[^\]]+\]\([^\)]+\)/,
// Links
/!\[[^\]]+\]\([^\)]+\)/,
// Images
/^\s*>.*/
// Blockquotes
];
static SETEXT_HEADER = /^.*\n(?:={3,}|-{3,})\s*$/m;
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
let confidence = 0;
if (fileExtension && [".md", ".markdown"].includes(fileExtension.toLowerCase())) {
confidence = Math.max(confidence, 0.95);
}
let mdFeatures = 0;
if (_MarkdownDetector.SETEXT_HEADER.test(text)) mdFeatures += 2;
for (const line of lines.slice(0, 20)) {
if (_MarkdownDetector.MD_PATTERNS.some((p) => p.test(line))) {
mdFeatures++;
}
}
const hasCodeFence = text.includes("```");
if (hasCodeFence) mdFeatures++;
if (mdFeatures > 1 && hasCodeFence) confidence = Math.max(confidence, 0.85);
if (mdFeatures > 3 && hasCodeFence) confidence = Math.max(confidence, 0.95);
else if (mdFeatures > 1) confidence = Math.max(confidence, 0.6);
else if (mdFeatures > 3) confidence = Math.max(confidence, 0.8);
else if (mdFeatures > 5) confidence = Math.max(confidence, 0.9);
const stripped = text.trim();
if (stripped.startsWith("{") && stripped.endsWith("}") || stripped.startsWith("[") && stripped.endsWith("]")) {
try {
JSON.parse(text);
if (confidence > 0.3) confidence -= 0.4;
} catch {
}
}
if (stripped.startsWith("<") && text.includes("<?xml")) {
if (confidence > 0.3) confidence -= 0.4;
}
return Math.min(Math.max(confidence, 0), 1);
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? "text/markdown" : "text/plain";
}
};
var PlainTextDetector = class _PlainTextDetector {
contentTypeName = "text";
static IMAGE_EXTS = [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp"];
detect(contentSample, lines, firstLine, fileExtension) {
if (!contentSample && lines.length === 0) return 0.1;
if (fileExtension) {
const ext = fileExtension.toLowerCase();
if (_PlainTextDetector.IMAGE_EXTS.includes(ext) || ext === ".pdf") return 0;
}
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
if (text.includes(",") && lines.length < 5) {
const commaLines = lines.filter((l) => l.includes(",")).length;
if (commaLines > 0 && commaLines === lines.length) {
return 0.8;
}
}
return 0.15;
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
return "text/plain";
}
};
// src/model/detectors/DataFormatDetectors.ts
var SQLDetector = class _SQLDetector {
contentTypeName = "sql";
// Keywords (case insensitive checking handled in method)
static KEYWORDS = [
"SELECT ",
"INSERT ",
"UPDATE ",
"DELETE ",
"CREATE ",
"DROP ",
"ALTER ",
"FROM ",
"WHERE ",
"JOIN ",
"TABLE ",
"INTO ",
"VALUES ",
"SET ",
"PRIMARY KEY"
];
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
let confidence = 0;
if (fileExtension && fileExtension.toLowerCase() === ".sql") {
confidence = Math.max(confidence, 0.95);
}
let hits = 0;
const upperText = text.toUpperCase();
for (const line of lines.slice(0, 10)) {
const upperLine = line.toUpperCase();
for (const kw of _SQLDetector.KEYWORDS) {
if (upperLine.includes(kw)) {
hits++;
}
}
}
if (hits >= 2) confidence = Math.max(confidence, 0.85);
else if (hits === 1) confidence = Math.max(confidence, 0.6);
return Math.min(confidence, 1);
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? "text/x-sql" : "text/plain";
}
};
var JSONDetector = class {
contentTypeName = "json";
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
if (fileExtension && fileExtension.toLowerCase() === ".json") {
return this.verifyJsonStructure(text) ? 0.95 : 0.6;
}
const stripped = text.trim();
if (!(stripped.startsWith("{") && stripped.endsWith("}") || stripped.startsWith("[") && stripped.endsWith("]"))) {
return 0;
}
for (const line of lines.slice(0, 5)) {
const l = line.trim();
if (l.startsWith("//") || l.startsWith("/*")) return 0;
}
try {
JSON.parse(text);
return 0.9;
} catch (e) {
return 0;
}
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? "application/json" : "text/plain";
}
verifyJsonStructure(text) {
try {
JSON.parse(text);
return true;
} catch {
return false;
}
}
};
var YAMLDetector = class _YAMLDetector {
contentTypeName = "yaml";
static YAML_START_PATTERNS = [/^---\s*$/, /^%YAML/];
static KEY_VALUE_PATTERN = /^\s*[\w.-]+:\s+(?![=\{\[])/;
static LIST_ITEM_PATTERN = /^\s*-\s+[\w\'\"]/;
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
let confidence = 0;
if (fileExtension && [".yaml", ".yml"].includes(fileExtension.toLowerCase())) {
confidence = Math.max(confidence, 0.95);
}
if (_YAMLDetector.YAML_START_PATTERNS.some((p) => p.test(firstLine))) {
confidence = Math.max(confidence, 0.9);
}
let yamlFeatures = 0;
if (_YAMLDetector.YAML_START_PATTERNS.some((p) => new RegExp(p.source, "m").test(text))) {
yamlFeatures += 2;
}
for (const line of lines.slice(0, 20)) {
const stripped = line.trim();
if (_YAMLDetector.KEY_VALUE_PATTERN.test(stripped)) yamlFeatures++;
else if (_YAMLDetector.LIST_ITEM_PATTERN.test(stripped)) yamlFeatures++;
}
const firstNonEmpty = lines.find((l) => l.trim().length > 0) || "";
if (firstNonEmpty.trim() === "---") {
if (yamlFeatures > 1) confidence = Math.max(confidence, 0.5);
if (yamlFeatures > 3) confidence = Math.max(confidence, 0.75);
if (yamlFeatures > 5) confidence = Math.max(confidence, 0.9);
} else {
if (fileExtension && [".yaml", ".yml"].includes(fileExtension.toLowerCase())) {
}
}
return Math.min(Math.max(confidence, 0), 1);
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
const conf = this.detect(contentSample, lines, firstLine, fileExtension);
return conf > 0.5 ? "application/x-yaml" : "text/plain";
}
};
var CSVDetector = class {
contentTypeName = "csv";
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
if (fileExtension && fileExtension.toLowerCase() === ".csv") {
return this.verifyCsvStructure(lines) ? 0.95 : 0.6;
}
return this.analyzeCsvContent(lines);
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? "text/csv" : "text/plain";
}
verifyCsvStructure(lines) {
const sampleLines = lines.slice(0, 10).filter((l) => l.trim().length > 0);
if (sampleLines.length === 0) return false;
if (!sampleLines.every((l) => l.includes(","))) return false;
const counts = sampleLines.map((l) => (l.match(/,/g) || []).length);
const uniqueCounts = [...new Set(counts)];
if (uniqueCounts.length === 1 && uniqueCounts[0] > 0) return true;
if (sampleLines.length > 1) {
const dataCounts = counts.slice(1);
const uniqueData = [...new Set(dataCounts)];
if (uniqueData.length === 1 && uniqueData[0] > 0) return true;
}
return false;
}
analyzeCsvContent(lines) {
if (!lines || lines.length === 0) return 0;
const sampleLines = lines.slice(0, 10).filter((l) => l.trim().length > 0);
if (sampleLines.length === 0 || !sampleLines.every((l) => l.includes(","))) return 0;
const counts = sampleLines.map((l) => (l.match(/,/g) || []).length);
const uniqueCounts = [...new Set(counts)];
if (uniqueCounts.length === 1 && uniqueCounts[0] > 0) return 0.9;
if (sampleLines.length > 1) {
const dataCounts = counts.slice(1);
const uniqueData = [...new Set(dataCounts)];
if (uniqueData.length === 1 && uniqueData[0] > 0) return 0.8;
}
if (counts.every((c) => c > 0)) return 0.5;
return 0;
}
};
// src/model/detectors/OBJDetector.ts
var OBJDetector = class _OBJDetector {
contentTypeName = "obj";
// Check for 'v ' (vertex), 'f ' (face), 'vn ', 'vt '
static COMMANDS = ["v ", "vt ", "vn ", "f ", "g ", "o ", "s ", "mtllib ", "usemtl "];
detect(contentSample, lines, firstLine, fileExtension) {
const text = typeof contentSample === "string" ? contentSample : new TextDecoder().decode(contentSample);
let confidence = 0;
if (fileExtension && fileExtension.toLowerCase() === ".obj") {
confidence = Math.max(confidence, 0.95);
}
const validLines = lines.filter((l) => l.trim().length > 0 && !l.trim().startsWith("#"));
let commandCount = 0;
for (const line of validLines.slice(0, 20)) {
const trimmed = line.trim();
for (const cmd of _OBJDetector.COMMANDS) {
if (trimmed.startsWith(cmd)) {
commandCount++;
break;
}
}
}
if (commandCount >= 2) {
if (commandCount > 10) confidence = Math.max(confidence, 0.9);
else if (commandCount > 5) confidence = Math.max(confidence, 0.8);
else confidence = Math.max(confidence, 0.7);
}
const codeKeywords = ["def ", "class ", "import ", "function ", "var ", "let ", "const "];
if (codeKeywords.some((k) => text.includes(k))) {
return 0;
}
return Math.min(confidence, 1);
}
getMimeType(contentSample, lines, firstLine, fileExtension) {
return this.detect(contentSample, lines, firstLine, fileExtension) > 0.5 ? "application/3d-obj" : "text/plain";
}
};
// src/model/detectors/registry.ts
var DetectorRegistry = class {
detectors;
constructor() {
this.detectors = [
new BinarySignatureDetector(),
// Programming languages
new ProgrammingLanguageDetector(),
// Structured data
new XMLDetector(),
new JSONDetector(),
new OBJDetector(),
// Markup
new MarkdownDetector(),
// Data formats (lower priority)
new SQLDetector(),
new CSVDetector(),
new YAMLDetector(),
// Fallback
new PlainTextDetector()
];
}
/**
* Detect content type and return the most likely MIME type.
*/
detect(contentSample, lines, firstLine, fileExtension) {
const hasComma = lines.some((l) => l.includes(","));
if (hasComma) {
if (lines.length < 3) {
const commaLines = lines.filter((l) => l.includes(",")).length;
if (commaLines > 0 && commaLines === lines.length) {
const delimCounts = lines.filter((l) => l.trim()).map((l) => (l.match(/,/g) || []).length);
if (delimCounts.length > 0 && delimCounts.every((c) => c <= 2)) {
return "text/plain";
}
}
}
}
let bestConfidence = 0;
let bestMime = "text/plain";
for (const detector of this.detectors) {
const confidence = detector.detect(contentSample, lines, firstLine, fileExtension);
if (confidence > bestConfidence) {
const mime = detector.getMimeType(contentSample, lines, firstLine, fileExtension);
if (mime) {
bestConfidence = confidence;
bestMime = mime;
if (confidence >= 0.99) break;
}
}
}
return bestMime;
}
};
var registry = new DetectorRegistry();
// src/model/ContentTypeInterpreter.ts
var ContentTypeInterpreter = class {
static MIME_TO_EXT = {
"text/plain": ".txt",
"application/json": ".json",
"text/csv": ".csv",
"text/x-python": ".py",
"text/javascript": ".js",
"text/jsx": ".jsx",
"text/typescript": ".ts",
"text/x-c": ".c",
"text/x-c++": ".cpp",
"application/xml": ".xml",
"text/html": ".html",
"application/x-yaml": ".yaml",
"text/markdown": ".md",
"text/x-sql": ".sql",
"image/png": ".png",
"image/jpeg": ".jpg",
"image/gif": ".gif",
"image/bmp": ".bmp",
"image/x-icon": ".ico",
"image/webp": ".webp",
"image/svg+xml": ".svg",
"application/pdf": ".pdf",
"application/zip": ".zip",
"application/gzip": ".gz",
"application/x-rar-compressed": ".rar",
"application/x-7z-compressed": ".7z",
"application/x-sqlite3": ".db",
"audio/wav": ".wav"
// Add more as needed
};
/**
* Convenience method to detect MIME type only.
* Matches the API expected by MCard/PCard/VCard.
*/
static detect(content) {
return this.detectContentType(content).mimeType;
}
/**
* Detect content type and suggest extension.
*
* @param content Content string or binary buffer
* @param fileExtension Optional file extension hint
* @returns Object containing detected mimeType and suggested extension
*/
static detectContentType(content, fileExtension) {
let lines = [];
let firstLine = "";
let textSample = "";
if (typeof content === "string") {
textSample = content.slice(0, 8192);
} else {
textSample = new TextDecoder("utf-8", { fatal: false }).decode(content.slice(0, 8192));
}
lines = textSample.split("\n").slice(0, 20);
firstLine = lines[0] || "";
const mimeType = registry.detect(content, lines, firstLine, fileExtension);
let extension = this.getExtension(mimeType);
if (fileExtension && extension) {
if (fileExtension.toLowerCase() === extension || fileExtension.toLowerCase() === `.${extension}`) {
extension = fileExtension;
}
}
if (!extension && fileExtension) {
extension = fileExtension;
}
if (!extension) {
extension = ".txt";
}
return { mimeType, extension };
}
static getExtension(mimeType) {
return this.MIME_TO_EXT[mimeType] || "";
}
/**
* Check if content should be treated as binary.
*/
static isBinaryContent(content, mimeType) {
if (mimeType) {
if (mimeType.startsWith("text/") || mimeType.includes("json") || mimeType.includes("xml") || mimeType.includes("javascript") || mimeType.includes("ecmascript")) {
return false;
}
return true;
}
if (typeof content === "string") return false;
const detection = this.detectContentType(content);
return !detection.mimeType.startsWith("text/") && !detection.mimeType.includes("json") && !detection.mimeType.includes("xml");
}
static isKnownLongLineExtension(extension) {
if (!extension) return false;
const ext = extension.toLowerCase();
return [".min.js", ".min.css", ".map", ".svg", ".json", ".geojson"].some((e) => ext.endsWith(e));
}
static isUnstructuredBinary(sample) {
if (sample.length < 512) return false;
let nullCount = 0;
let controlCount = 0;
const len = Math.min(sample.length, 32 * 1024);
for (let i = 0; i < len; i++) {
const byte = sample[i];
if (byte === 0) {
nullCount++;
}
if (byte < 32 && byte !== 9 && byte !== 10 && byte !== 13) {
controlCount++;
}
}
const nullRatio = nullCount / len;
const controlRatio = controlCount / len;
return nullRatio > 0.1 || controlRatio > 0.2;
}
static hasPathologicalLines(sample, isKnownType) {
if (isKnownType || sample.length < 32768) return false;
for (let i = 0; i < sample.length; i++) {
if (sample[i] === 10 || sample[i] === 13) return false;
}
return true;
}
};
// src/types/dots.ts
function createMCardDOTSMetadata(tightRefs = [], looseRefs = []) {
return {
role: "Carrier" /* CARRIER */,
eosRole: "InvariantContent" /* INVARIANT_CONTENT */,
plane: "Data" /* DATA */,
tightRefs,
looseRefs
};
}
function createPCardDOTSMetadata(isLens, tightRefs = [], looseRefs = []) {
return {
role: isLens ? "Lens" /* LENS */ : "Chart" /* CHART */,
eosRole: "GenerativeLens" /* GENERATIVE_LENS */,
plane: "Control" /* CONTROL */,
tightRefs,
looseRefs
};
}
function createVCardDOTSMetadata() {
return {
role: "Arena" /* ARENA */,
eosRole: "SovereignDecision" /* SOVEREIGN_DECISION */,
plane: "Application" /* APPLICATION */
};
}
// src/model/MCard.ts
var MCard = class _MCard {
content;
hash;
g_time;
contentType;
// Defaulting to specific string or null
hashFunction;
constructor(content, hash, g_time, contentType, hashFunction) {
this.content = content;
this.hash = hash;
this.g_time = g_time;
this.contentType = contentType;
this.hashFunction = hashFunction;
}
/**
* Create a new MCard from content
*/
static async create(content, hashAlgorithm = "sha256") {
if (content === null || content === void 0) {
throw new Error("Content cannot be null or undefined");
}
const bytes = typeof content === "string" ? new TextEncoder().encode(content) : content;
if (bytes.length === 0) {
throw new Error("Content cannot be empty");
}
const hash = await HashValidator.computeHash(bytes, hashAlgorithm);
const g_time = GTime.stampNow(hashAlgorithm);
const contentType = ContentTypeInterpreter.detect(bytes);
return new _MCard(bytes, hash, g_time, contentType, hashAlgorithm);
}
/**
* Create an MCard from existing data (e.g., from database)
*/
static fromData(content, hash, g_time) {
const alg = GTime.getHashAlgorithm(g_time);
const contentType = ContentTypeInterpreter.detect(content);
return new _MCard(content, hash, g_time, contentType, alg);
}
/**
* Get content as text (UTF-8 decoded)
*/
getContentAsText() {
return new TextDecoder().decode(this.content);
}
/**
* Get content as raw bytes
*/
getContent() {
return this.content;
}
/**
* Convert to plain object
*/
toObject() {
return {
hash: this.hash,
content: this.getContentAsText(),
g_time: this.g_time,
contentType: this.contentType,
hashFunction: this.hashFunction
};
}
/**
* Get DOTS vocabulary metadata for this MCard
*
* Returns the DOTS role information that positions this MCard
* in the Double Operadic Theory of Systems framework.
*
* MCard is always a CARRIER object in the Data Plane.
*
* @param tightRefs - Optional array of prerequisite MCard hashes (vertical composition)
* @param looseRefs - Optional array of alternative MCard hashes (horizontal composition)
* @returns DOTSMetadata describing this card's role in the compositional system
*
* @example
* ```typescript
* const card = await MCard.create('Hello World');
* const meta = card.getDOTSMetadata();
* console.log(meta.role); // 'Carrier'
* console.log(meta.plane); // 'Data'
* ```
*/
getDOTSMetadata(tightRefs = [], looseRefs = []) {
return createMCardDOTSMetadata(tightRefs, looseRefs);
}
};
export {
ContentTypeInterpreter,
createPCardDOTSMetadata,
createVCardDOTSMetadata,
MCard
};