s3db.js
Version:
Use AWS S3, the world's most reliable document storage, as a database with this ORM.
440 lines (385 loc) • 12.1 kB
JavaScript
/**
* Advanced metadata encoding for S3
* Pattern-specific optimizations for common data types
*/
import { encode as toBase62, decode as fromBase62 } from './base62.js';
// Common dictionary values mapping
const DICTIONARY = {
// Status values
'active': '\x01',
'inactive': '\x02',
'pending': '\x03',
'completed': '\x04',
'failed': '\x05',
'deleted': '\x06',
'archived': '\x07',
'draft': '\x08',
// Booleans
'true': '\x10',
'false': '\x11',
'yes': '\x12',
'no': '\x13',
'1': '\x14',
'0': '\x15',
// HTTP methods (lowercase for matching)
'get': '\x20',
'post': '\x21',
'put': '\x22',
'delete': '\x23',
'patch': '\x24',
'head': '\x25',
'options': '\x26',
// Common words
'enabled': '\x30',
'disabled': '\x31',
'success': '\x32',
'error': '\x33',
'warning': '\x34',
'info': '\x35',
'debug': '\x36',
'critical': '\x37',
// Null-like values
'null': '\x40',
'undefined': '\x41',
'none': '\x42',
'empty': '\x43',
'nil': '\x44',
};
// Reverse dictionary for decoding
const REVERSE_DICTIONARY = Object.fromEntries(
Object.entries(DICTIONARY).map(([k, v]) => [v, k])
);
/**
* Detect if string is a UUID
*/
function isUUID(str) {
return /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(str);
}
/**
* Detect if string is hexadecimal
*/
function isHexString(str) {
return /^[0-9a-f]+$/i.test(str) && str.length >= 8 && str.length % 2 === 0;
}
/**
* Detect if string is a timestamp (Unix or milliseconds)
*/
function isTimestamp(str) {
if (!/^\d+$/.test(str)) return false;
const num = parseInt(str);
// Unix timestamps: 1000000000 (2001) to 2000000000 (2033)
// Millisecond timestamps: 1000000000000 (2001) to 2000000000000 (2033)
return (num >= 1000000000 && num <= 2000000000) ||
(num >= 1000000000000 && num <= 2000000000000);
}
/**
* Detect if string is an ISO 8601 timestamp
*/
function isISOTimestamp(str) {
// Match ISO 8601 format: YYYY-MM-DDTHH:mm:ss.sssZ or ±HH:MM
return /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?(Z|[+-]\d{2}:\d{2})?$/.test(str);
}
/**
* Detect if string is an integer that would benefit from base62
*/
function isBeneficialInteger(str) {
if (!/^\d+$/.test(str)) return false;
// Only beneficial if base62 would be shorter
const num = parseInt(str);
return toBase62(num).length < str.length;
}
/**
* Encode a value using pattern detection
*/
export function advancedEncode(value) {
// Handle null and undefined
if (value === null) return { encoded: 'd' + DICTIONARY['null'], method: 'dictionary' };
if (value === undefined) return { encoded: 'd' + DICTIONARY['undefined'], method: 'dictionary' };
const str = String(value);
// Empty string
if (str === '') return { encoded: '', method: 'none' };
// Check dictionary first (most efficient)
const lowerStr = str.toLowerCase();
if (DICTIONARY[lowerStr]) {
// Preserve uppercase for HTTP methods
const isUpperCase = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'].includes(str);
return {
encoded: 'd' + DICTIONARY[lowerStr] + (isUpperCase ? 'U' : ''),
method: 'dictionary',
original: str
};
}
// ISO Timestamp optimization - convert to Unix timestamp with base62
if (isISOTimestamp(str)) {
const unixMs = new Date(str).getTime();
const hasMillis = str.includes('.');
const encoded = toBase62(unixMs); // Use milliseconds to preserve precision
// Add a flag for whether original had milliseconds: m = with millis, s = without
const flag = hasMillis ? 'm' : 's';
return {
encoded: 'i' + flag + encoded, // 'i' prefix + flag + encoded timestamp
method: 'iso-timestamp',
original: str,
savings: `${Math.round((1 - (encoded.length + 2)/str.length) * 100)}%`
};
}
// Numeric timestamp optimization with base62 (check before hex)
if (isTimestamp(str)) {
const encoded = toBase62(parseInt(str));
if (encoded.length < str.length) {
return {
encoded: 't' + encoded,
method: 'timestamp',
original: str,
savings: `${Math.round((1 - encoded.length/str.length) * 100)}%`
};
}
}
// UUID optimization: 36 chars → 16 bytes
if (isUUID(str)) {
const hex = str.replace(/-/g, '');
const binary = Buffer.from(hex, 'hex');
return {
encoded: 'u' + binary.toString('base64'),
method: 'uuid',
original: str,
savings: `${Math.round((1 - 24/36) * 100)}%` // base64 of 16 bytes = ~24 chars
};
}
// Hex string optimization (MD5, SHA, ObjectId): 50% compression
if (isHexString(str)) {
const binary = Buffer.from(str, 'hex');
return {
encoded: 'h' + binary.toString('base64'),
method: 'hex',
original: str,
savings: '33%' // hex to base64 is ~33% savings
};
}
// Integer optimization with base62
if (isBeneficialInteger(str)) {
const encoded = toBase62(parseInt(str));
return {
encoded: 'n' + encoded,
method: 'number',
original: str,
savings: `${Math.round((1 - encoded.length/str.length) * 100)}%`
};
}
// Check if it's pure ASCII
if (/^[\x20-\x7E]*$/.test(str)) {
// Check for common prefixes we could optimize
const prefixes = ['user_', 'sess_', 'item_', 'order_', 'tx_', 'id_', 'http://', 'https://'];
for (const prefix of prefixes) {
if (str.startsWith(prefix)) {
// Could implement prefix table, but for now just mark it
// In future: return { encoded: 'p' + prefixCode + str.slice(prefix.length), method: 'prefix' };
}
}
// Pure ASCII - add a marker to avoid confusion with encoded values
// Use '=' as marker for unencoded ASCII (not used by other encodings)
return { encoded: '=' + str, method: 'none' };
}
// Has special characters - fallback to smart encoding
// Check for Latin-1 vs multibyte
const hasMultibyte = /[^\x00-\xFF]/.test(str);
if (hasMultibyte) {
// Use base64 for emoji/CJK
return {
encoded: 'b' + Buffer.from(str, 'utf8').toString('base64'),
method: 'base64'
};
}
// Latin-1 characters - use URL encoding
return {
encoded: '%' + encodeURIComponent(str),
method: 'url'
};
}
/**
* Decode an advanced-encoded value
*/
export function advancedDecode(value) {
if (!value || typeof value !== 'string') return value;
if (value.length === 0) return '';
// Check if this is actually an encoded value
// Encoded values have specific prefixes followed by encoded content
const prefix = value[0];
const content = value.slice(1);
// If no content after prefix, it's not encoded
if (content.length === 0 && prefix !== 'd') {
return value;
}
switch (prefix) {
case 'd': // Dictionary
if (content.endsWith('U')) {
// Uppercase flag for HTTP methods
const key = content.slice(0, -1);
const val = REVERSE_DICTIONARY[key];
return val ? val.toUpperCase() : value;
}
return REVERSE_DICTIONARY[content] || value;
case 'i': // ISO timestamp
try {
const flag = content[0]; // 'm' = with millis, 's' = without
const unixMs = fromBase62(content.slice(1)); // Now stored as milliseconds
const date = new Date(unixMs);
let iso = date.toISOString();
// Format based on original
if (flag === 's' && iso.endsWith('.000Z')) {
iso = iso.replace('.000', '');
}
return iso;
} catch {
return value;
}
case 'u': // UUID
try {
const binary = Buffer.from(content, 'base64');
const hex = binary.toString('hex');
// Reconstruct UUID format
return [
hex.slice(0, 8),
hex.slice(8, 12),
hex.slice(12, 16),
hex.slice(16, 20),
hex.slice(20, 32)
].join('-');
} catch {
return value;
}
case 'h': // Hex string
try {
const binary = Buffer.from(content, 'base64');
return binary.toString('hex');
} catch {
return value;
}
case 't': // Timestamp
case 'n': // Number
try {
const num = fromBase62(content);
// If decoding failed, return original
if (isNaN(num)) return value;
return String(num);
} catch {
return value;
}
case 'b': // Base64
try {
return Buffer.from(content, 'base64').toString('utf8');
} catch {
return value;
}
case '%': // URL encoded
try {
return decodeURIComponent(content);
} catch {
return value;
}
case '=': // Unencoded ASCII
return content;
default:
// No prefix - return as is
return value;
}
}
/**
* Calculate size for advanced encoding
*/
export function calculateAdvancedSize(value) {
const result = advancedEncode(value);
const originalSize = Buffer.byteLength(String(value), 'utf8');
const encodedSize = Buffer.byteLength(result.encoded, 'utf8');
return {
original: originalSize,
encoded: encodedSize,
method: result.method,
savings: originalSize > 0 ? Math.round((1 - encodedSize/originalSize) * 100) : 0,
ratio: originalSize > 0 ? encodedSize / originalSize : 1
};
}
/**
* Encode all values in a metadata object
*/
export function encodeMetadata(metadata) {
if (!metadata || typeof metadata !== 'object') return metadata;
const encoded = {};
for (const [key, value] of Object.entries(metadata)) {
if (value === null || value === undefined) {
encoded[key] = value;
} else if (Array.isArray(value)) {
encoded[key] = value.map(v => {
if (typeof v === 'string') {
return advancedEncode(v).encoded;
}
return v;
});
} else if (typeof value === 'object' && !(value instanceof Date)) {
encoded[key] = encodeMetadata(value);
} else if (typeof value === 'string') {
encoded[key] = advancedEncode(value).encoded;
} else if (value instanceof Date) {
encoded[key] = advancedEncode(value.toISOString()).encoded;
} else {
encoded[key] = value;
}
}
return encoded;
}
/**
* Decode all values in a metadata object
*/
export function decodeMetadata(metadata) {
if (!metadata || typeof metadata !== 'object') return metadata;
const decoded = {};
for (const [key, value] of Object.entries(metadata)) {
if (value === null || value === undefined) {
decoded[key] = value;
} else if (Array.isArray(value)) {
decoded[key] = value.map(v => {
if (typeof v === 'string') {
return advancedDecode(v);
}
return v;
});
} else if (typeof value === 'object') {
decoded[key] = decodeMetadata(value);
} else if (typeof value === 'string') {
decoded[key] = advancedDecode(value);
} else {
decoded[key] = value;
}
}
return decoded;
}
/**
* Batch optimize an object's values
*/
export function optimizeObjectValues(obj) {
const optimized = {};
const stats = {
totalOriginal: 0,
totalOptimized: 0,
methods: {}
};
for (const [key, value] of Object.entries(obj)) {
const result = advancedEncode(value);
optimized[key] = result.encoded;
const originalSize = Buffer.byteLength(String(value), 'utf8');
const optimizedSize = Buffer.byteLength(result.encoded, 'utf8');
stats.totalOriginal += originalSize;
stats.totalOptimized += optimizedSize;
stats.methods[result.method] = (stats.methods[result.method] || 0) + 1;
}
stats.savings = stats.totalOriginal > 0 ?
Math.round((1 - stats.totalOptimized/stats.totalOriginal) * 100) : 0;
return { optimized, stats };
}
// Backwards compatibility exports
export {
advancedEncode as ultraEncode,
advancedDecode as ultraDecode,
calculateAdvancedSize as calculateUltraSize,
optimizeObjectValues as ultraOptimizeObject
};