@andrejs1979/document
Version:
MongoDB-compatible document database for NoSQL
670 lines (669 loc) • 27.8 kB
JavaScript
/**
* NoSQL - Document Storage Engine
* High-performance document storage with Cloudflare D1/R2 integration
*/
import { DocumentError, ValidationError, DuplicateKeyError } from '../types';
/**
* Document storage with caching and optimization
*/
export class DocumentStorage {
d1;
kvStore;
r2Bucket;
config;
documentCache = new Map();
queryCache = new Map();
initialized = false;
constructor(config) {
this.config = {
name: config.name,
d1Database: config.d1Database,
kvStore: config.kvStore,
r2Bucket: config.r2Bucket,
maxDocumentSize: config.maxDocumentSize || 16 * 1024 * 1024, // 16MB
queryTimeout: config.queryTimeout || 30000,
batchSize: config.batchSize || 100,
enableQueryCache: config.enableQueryCache ?? true,
queryCacheTTL: config.queryCacheTTL || 300, // 5 minutes
cacheSize: config.cacheSize || 100, // MB
enableAutoIndexing: config.enableAutoIndexing ?? true,
autoIndexThreshold: config.autoIndexThreshold || 1000,
maxIndexedFields: config.maxIndexedFields || 20,
vectorConfig: {
enabled: config.vectorConfig?.enabled ?? true,
defaultDimensions: config.vectorConfig?.defaultDimensions || 1536,
defaultModel: config.vectorConfig?.defaultModel || 'text-embedding-ada-002',
autoEmbedding: config.vectorConfig?.autoEmbedding ?? false,
embeddingFields: config.vectorConfig?.embeddingFields || ['content', 'text', 'description'],
...config.vectorConfig
},
enableValidation: config.enableValidation ?? true,
enableSchemaEvolution: config.enableSchemaEvolution ?? true,
enableChangeStreams: config.enableChangeStreams ?? true,
maxChangeStreamConnections: config.maxChangeStreamConnections || 1000,
enableQueryLogging: config.enableQueryLogging ?? false,
enablePerformanceMetrics: config.enablePerformanceMetrics ?? true,
enableRelationships: config.enableRelationships ?? true,
populateDepth: config.populateDepth || 3,
bulkWriteBatchSize: config.bulkWriteBatchSize || 1000,
bulkWriteParallelism: config.bulkWriteParallelism || 4
};
this.d1 = config.d1Database;
this.kvStore = config.kvStore;
this.r2Bucket = config.r2Bucket;
if (!this.d1) {
throw new DocumentError('D1 database instance is required', 'MISSING_D1');
}
}
/**
* Initialize storage with schema creation
*/
async initialize() {
if (this.initialized)
return;
try {
// Create core document table
await this.d1.exec(`
CREATE TABLE IF NOT EXISTS documents (
_id TEXT PRIMARY KEY,
_collection TEXT NOT NULL,
_database TEXT NOT NULL,
_data TEXT NOT NULL, -- JSON document data
_searchText TEXT, -- Full-text search index
_vector BLOB, -- Vector embeddings
_vectorDims INTEGER, -- Vector dimensions
_metadata TEXT, -- JSON metadata
_version INTEGER DEFAULT 1,
_createdAt DATETIME DEFAULT CURRENT_TIMESTAMP,
_updatedAt DATETIME DEFAULT CURRENT_TIMESTAMP,
_deleted BOOLEAN DEFAULT FALSE,
-- Dynamic indexed fields
_idx_field_1 TEXT,
_idx_field_2 TEXT,
_idx_field_3 TEXT,
_idx_field_4 TEXT,
_idx_field_5 TEXT,
_idx_field_6 REAL,
_idx_field_7 REAL,
_idx_field_8 REAL,
_idx_field_9 INTEGER,
_idx_field_10 INTEGER
)
`);
// Create core indexes
await this.createCoreIndexes();
// Create metadata tables
await this.createMetadataTables();
this.initialized = true;
console.log(`Document storage initialized for database: ${this.config.name}`);
}
catch (error) {
throw new DocumentError(`Storage initialization failed: ${error.message}`, 'INIT_ERROR');
}
}
/**
* Insert a single document
*/
async insertOne(collection, document) {
await this.ensureInitialized();
if (!document._id) {
document._id = this.generateObjectId();
}
// Validate document size
const docSize = JSON.stringify(document).length;
if (docSize > this.config.maxDocumentSize) {
throw new ValidationError(`Document size ${docSize} exceeds maximum ${this.config.maxDocumentSize}`);
}
try {
const now = new Date();
document._createdAt = now;
document._updatedAt = now;
document._version = 1;
// Prepare search text and metadata
const searchText = this.extractSearchText(document);
const metadata = this.extractMetadata(document);
// Prepare vector data if present
let vectorData = null;
let vectorDims = null;
if (document._vector) {
vectorData = new Uint8Array(document._vector.data.buffer);
vectorDims = document._vector.data.length;
}
// Extract dynamic index values
const indexValues = this.extractIndexValues(document);
// Insert into D1
const result = await this.d1.prepare(`
INSERT INTO documents (
_id, _collection, _database, _data, _searchText, _vector, _vectorDims, _metadata,
_version, _createdAt, _updatedAt, _deleted,
_idx_field_1, _idx_field_2, _idx_field_3, _idx_field_4, _idx_field_5,
_idx_field_6, _idx_field_7, _idx_field_8, _idx_field_9, _idx_field_10
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`).bind(document._id, collection, this.config.name, JSON.stringify(document), searchText, vectorData, vectorDims, JSON.stringify(metadata), document._version, now.toISOString(), now.toISOString(), false, ...indexValues).run();
if (!result.success) {
throw new DocumentError('Failed to insert document', 'INSERT_ERROR');
}
// Cache the document
this.cacheDocument(this.getDocumentKey(collection, document._id), document);
// Store large documents in R2 if available
if (docSize > 1024 * 1024 && this.r2Bucket) { // > 1MB
await this.storeInR2(collection, document._id, document);
}
return {
acknowledged: true,
insertedId: document._id
};
}
catch (error) {
if (error.message?.includes('UNIQUE constraint failed')) {
throw new DuplicateKeyError(`Document with _id '${document._id}' already exists`);
}
throw new DocumentError(`Insert failed: ${error.message}`, 'INSERT_ERROR');
}
}
/**
* Insert multiple documents
*/
async insertMany(collection, documents) {
await this.ensureInitialized();
const insertedIds = [];
const batchSize = this.config.bulkWriteBatchSize;
try {
// Process in batches
for (let i = 0; i < documents.length; i += batchSize) {
const batch = documents.slice(i, i + batchSize);
// Prepare batch for transaction
const statements = batch.map(doc => {
if (!doc._id) {
doc._id = this.generateObjectId();
}
const now = new Date();
doc._createdAt = now;
doc._updatedAt = now;
doc._version = 1;
const searchText = this.extractSearchText(doc);
const metadata = this.extractMetadata(doc);
const indexValues = this.extractIndexValues(doc);
let vectorData = null;
let vectorDims = null;
if (doc._vector) {
vectorData = new Uint8Array(doc._vector.data.buffer);
vectorDims = doc._vector.data.length;
}
return {
stmt: this.d1.prepare(`
INSERT INTO documents (
_id, _collection, _database, _data, _searchText, _vector, _vectorDims, _metadata,
_version, _createdAt, _updatedAt, _deleted,
_idx_field_1, _idx_field_2, _idx_field_3, _idx_field_4, _idx_field_5,
_idx_field_6, _idx_field_7, _idx_field_8, _idx_field_9, _idx_field_10
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`),
bind: [
doc._id,
collection,
this.config.name,
JSON.stringify(doc),
searchText,
vectorData,
vectorDims,
JSON.stringify(metadata),
doc._version,
now.toISOString(),
now.toISOString(),
false,
...indexValues
]
};
});
// Execute batch transaction
const results = await this.d1.batch(statements.map(s => s.stmt.bind(...s.bind)));
// Collect successful insertions
for (let j = 0; j < results.length; j++) {
if (results[j].success) {
insertedIds.push(batch[j]._id);
// Cache the document
this.cacheDocument(this.getDocumentKey(collection, batch[j]._id), batch[j]);
}
}
}
return {
acknowledged: true,
insertedCount: insertedIds.length,
insertedIds
};
}
catch (error) {
throw new DocumentError(`Bulk insert failed: ${error.message}`, 'BULK_INSERT_ERROR');
}
}
/**
* Find documents by filter
*/
async find(collection, filter, options = {}) {
await this.ensureInitialized();
try {
// Check query cache first
const cacheKey = this.getQueryCacheKey('find', collection, filter, options);
if (this.config.enableQueryCache && this.queryCache.has(cacheKey)) {
const cached = this.queryCache.get(cacheKey);
if (Date.now() - cached.timestamp < this.config.queryCacheTTL * 1000) {
return cached.result;
}
this.queryCache.delete(cacheKey);
}
// Build SQL query from MongoDB filter
const { sql, params } = this.buildQuery(collection, filter, options);
// Execute query
const result = await this.d1.prepare(sql).bind(...params).all();
// Parse results
const documents = result.results?.map((row) => {
const doc = JSON.parse(row._data);
// Add vector data if present
if (row._vector && row._vectorDims) {
doc._vector = {
id: doc._id,
data: new Float32Array(new Uint8Array(row._vector).buffer),
metadata: doc._vector?.metadata || {}
};
}
return doc;
}) || [];
// Cache the result
if (this.config.enableQueryCache) {
this.queryCache.set(cacheKey, {
result: documents,
timestamp: Date.now()
});
}
return documents;
}
catch (error) {
throw new DocumentError(`Find operation failed: ${error.message}`, 'FIND_ERROR');
}
}
/**
* Find a single document
*/
async findOne(collection, filter, options = {}) {
const results = await this.find(collection, filter, { ...options, limit: 1 });
return results.length > 0 ? results[0] : null;
}
/**
* Update a single document
*/
async updateOne(collection, filter, update, options = {}) {
await this.ensureInitialized();
try {
// Find the document first
const existingDoc = await this.findOne(collection, filter);
if (!existingDoc && !options.upsert) {
return {
acknowledged: true,
matchedCount: 0,
modifiedCount: 0,
upsertedCount: 0
};
}
let doc = existingDoc;
let upsertedId;
if (!doc && options.upsert) {
// Create new document for upsert
doc = { ...filter, _id: this.generateObjectId() };
upsertedId = doc._id;
}
// Apply updates
const updatedDoc = this.applyUpdateOperators(doc, update);
updatedDoc._updatedAt = new Date();
updatedDoc._version = (updatedDoc._version || 0) + 1;
// Prepare for update
const searchText = this.extractSearchText(updatedDoc);
const metadata = this.extractMetadata(updatedDoc);
const indexValues = this.extractIndexValues(updatedDoc);
let vectorData = null;
let vectorDims = null;
if (updatedDoc._vector) {
vectorData = new Uint8Array(updatedDoc._vector.data.buffer);
vectorDims = updatedDoc._vector.data.length;
}
let result;
if (existingDoc) {
// Update existing document
result = await this.d1.prepare(`
UPDATE documents SET
_data = ?, _searchText = ?, _vector = ?, _vectorDims = ?, _metadata = ?,
_version = ?, _updatedAt = ?,
_idx_field_1 = ?, _idx_field_2 = ?, _idx_field_3 = ?, _idx_field_4 = ?, _idx_field_5 = ?,
_idx_field_6 = ?, _idx_field_7 = ?, _idx_field_8 = ?, _idx_field_9 = ?, _idx_field_10 = ?
WHERE _id = ? AND _collection = ? AND _database = ? AND _deleted = FALSE
`).bind(JSON.stringify(updatedDoc), searchText, vectorData, vectorDims, JSON.stringify(metadata), updatedDoc._version, updatedDoc._updatedAt.toISOString(), ...indexValues, updatedDoc._id, collection, this.config.name).run();
}
else {
// Insert new document (upsert)
result = await this.insertOne(collection, updatedDoc);
}
// Update cache
this.cacheDocument(this.getDocumentKey(collection, updatedDoc._id), updatedDoc);
return {
acknowledged: true,
matchedCount: existingDoc ? 1 : 0,
modifiedCount: existingDoc ? 1 : 0,
upsertedId,
upsertedCount: upsertedId ? 1 : 0
};
}
catch (error) {
throw new DocumentError(`Update operation failed: ${error.message}`, 'UPDATE_ERROR');
}
}
/**
* Delete documents
*/
async deleteMany(collection, filter) {
await this.ensureInitialized();
try {
// Build query to find matching documents
const { sql, params } = this.buildQuery(collection, filter, {});
// Soft delete (mark as deleted)
const deleteSql = sql.replace('SELECT _data FROM documents', 'UPDATE documents SET _deleted = TRUE, _updatedAt = ?');
const now = new Date().toISOString();
const result = await this.d1.prepare(deleteSql).bind(now, ...params).run();
// Clear from cache
if (result.changes > 0) {
this.clearQueryCache();
}
return {
acknowledged: true,
deletedCount: result.changes || 0
};
}
catch (error) {
throw new DocumentError(`Delete operation failed: ${error.message}`, 'DELETE_ERROR');
}
}
/**
* Delete a single document
*/
async deleteOne(collection, filter) {
const doc = await this.findOne(collection, filter);
if (!doc) {
return { acknowledged: true, deletedCount: 0 };
}
const result = await this.deleteMany(collection, { _id: doc._id });
return {
acknowledged: true,
deletedCount: Math.min(result.deletedCount, 1)
};
}
/**
* Count documents
*/
async countDocuments(collection, filter = {}) {
await this.ensureInitialized();
try {
const { sql, params } = this.buildQuery(collection, filter, {});
const countSql = sql.replace('SELECT _data FROM documents', 'SELECT COUNT(*) as count FROM documents');
const result = await this.d1.prepare(countSql).bind(...params).first();
return result?.count || 0;
}
catch (error) {
throw new DocumentError(`Count operation failed: ${error.message}`, 'COUNT_ERROR');
}
}
// ===============================
// Private Methods
// ===============================
async ensureInitialized() {
if (!this.initialized) {
await this.initialize();
}
}
generateObjectId() {
// Simple ObjectId generation - in production use a proper library
const timestamp = Math.floor(Date.now() / 1000).toString(16);
const random = Math.random().toString(16).substring(2, 10);
const counter = Math.random().toString(16).substring(2, 8);
return timestamp + random + counter;
}
extractSearchText(document) {
// Extract searchable text from document
const searchableFields = ['title', 'content', 'description', 'text', 'name'];
const texts = [];
const extractText = (obj, depth = 0) => {
if (depth > 3)
return; // Prevent infinite recursion
for (const [key, value] of Object.entries(obj)) {
if (typeof value === 'string' && (searchableFields.includes(key) || key.includes('text'))) {
texts.push(value);
}
else if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
extractText(value, depth + 1);
}
}
};
extractText(document);
return texts.join(' ').substring(0, 10000); // Limit to 10KB
}
extractMetadata(document) {
return {
tags: document.tags || [],
category: document.category,
source: document.source,
confidence: document.confidence,
lastProcessed: new Date(),
processingVersion: '1.0.0',
customFields: {}
};
}
extractIndexValues(document) {
// Extract up to 10 indexable values from the document
const values = new Array(10).fill(null);
// Common indexable fields
const stringFields = ['status', 'type', 'category', 'userId', 'email'];
const numberFields = ['score', 'rating', 'price', 'quantity', 'age'];
const integerFields = ['views', 'likes', 'comments', 'shares', 'count'];
let idx = 0;
// String fields (fields 1-5)
for (const field of stringFields) {
if (idx >= 5)
break;
if (document[field] && typeof document[field] === 'string') {
values[idx] = document[field];
}
idx++;
}
// Number fields (fields 6-8)
idx = 5;
for (const field of numberFields) {
if (idx >= 8)
break;
if (document[field] && typeof document[field] === 'number') {
values[idx] = document[field];
}
idx++;
}
// Integer fields (fields 9-10)
idx = 8;
for (const field of integerFields) {
if (idx >= 10)
break;
if (document[field] && Number.isInteger(document[field])) {
values[idx] = document[field];
}
idx++;
}
return values;
}
buildQuery(collection, filter, options) {
const params = [];
const conditions = [];
// Base conditions
conditions.push('_collection = ?');
params.push(collection);
conditions.push('_database = ?');
params.push(this.config.name);
conditions.push('_deleted = FALSE');
// Build filter conditions
this.buildFilterConditions(filter, conditions, params);
// Build SQL
let sql = `SELECT _data, _vector, _vectorDims FROM documents WHERE ${conditions.join(' AND ')}`;
// Add sorting
if (options.sort) {
const sortClauses = Object.entries(options.sort).map(([field, direction]) => {
return `JSON_EXTRACT(_data, '$.${field}') ${direction === 1 ? 'ASC' : 'DESC'}`;
});
sql += ` ORDER BY ${sortClauses.join(', ')}`;
}
// Add limit and offset
if (options.limit) {
sql += ` LIMIT ${options.limit}`;
}
if (options.skip) {
sql += ` OFFSET ${options.skip}`;
}
return { sql, params };
}
buildFilterConditions(filter, conditions, params) {
for (const [key, value] of Object.entries(filter)) {
if (key.startsWith('$')) {
// Handle MongoDB operators
this.handleOperator(key, value, conditions, params);
}
else {
// Simple equality
conditions.push(`JSON_EXTRACT(_data, '$.${key}') = ?`);
params.push(value);
}
}
}
handleOperator(operator, value, conditions, params) {
switch (operator) {
case '$and':
const andConditions = [];
for (const subFilter of value) {
const subConds = [];
this.buildFilterConditions(subFilter, subConds, params);
andConditions.push(`(${subConds.join(' AND ')})`);
}
conditions.push(`(${andConditions.join(' AND ')})`);
break;
case '$or':
const orConditions = [];
for (const subFilter of value) {
const subConds = [];
this.buildFilterConditions(subFilter, subConds, params);
orConditions.push(`(${subConds.join(' AND ')})`);
}
conditions.push(`(${orConditions.join(' OR ')})`);
break;
// Add more operators as needed
}
}
applyUpdateOperators(document, update) {
const result = { ...document };
if (this.isUpdateOperators(update)) {
// Handle update operators
if (update.$set) {
Object.assign(result, update.$set);
}
if (update.$unset) {
for (const field of Object.keys(update.$unset)) {
delete result[field];
}
}
if (update.$inc) {
for (const [field, increment] of Object.entries(update.$inc)) {
result[field] = (result[field] || 0) + increment;
}
}
// Add more operators as needed
}
else {
// Replace document
Object.assign(result, update);
}
return result;
}
isUpdateOperators(update) {
return Object.keys(update).some(key => key.startsWith('$'));
}
async createCoreIndexes() {
const indexes = [
'CREATE INDEX IF NOT EXISTS idx_collection ON documents(_collection)',
'CREATE INDEX IF NOT EXISTS idx_database ON documents(_database)',
'CREATE INDEX IF NOT EXISTS idx_created_at ON documents(_createdAt)',
'CREATE INDEX IF NOT EXISTS idx_updated_at ON documents(_updatedAt)',
'CREATE INDEX IF NOT EXISTS idx_deleted ON documents(_deleted)',
'CREATE INDEX IF NOT EXISTS idx_collection_db ON documents(_collection, _database)',
// Dynamic field indexes
'CREATE INDEX IF NOT EXISTS idx_field_1 ON documents(_idx_field_1)',
'CREATE INDEX IF NOT EXISTS idx_field_2 ON documents(_idx_field_2)',
'CREATE INDEX IF NOT EXISTS idx_field_3 ON documents(_idx_field_3)',
'CREATE INDEX IF NOT EXISTS idx_field_4 ON documents(_idx_field_4)',
'CREATE INDEX IF NOT EXISTS idx_field_5 ON documents(_idx_field_5)',
'CREATE INDEX IF NOT EXISTS idx_field_6 ON documents(_idx_field_6)',
'CREATE INDEX IF NOT EXISTS idx_field_7 ON documents(_idx_field_7)',
'CREATE INDEX IF NOT EXISTS idx_field_8 ON documents(_idx_field_8)',
'CREATE INDEX IF NOT EXISTS idx_field_9 ON documents(_idx_field_9)',
'CREATE INDEX IF NOT EXISTS idx_field_10 ON documents(_idx_field_10)'
];
for (const indexSql of indexes) {
await this.d1.exec(indexSql);
}
}
async createMetadataTables() {
// Collection metadata
await this.d1.exec(`
CREATE TABLE IF NOT EXISTS collection_metadata (
database_name TEXT,
collection_name TEXT,
document_count INTEGER DEFAULT 0,
total_size INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (database_name, collection_name)
)
`);
// Index mappings
await this.d1.exec(`
CREATE TABLE IF NOT EXISTS index_mappings (
database_name TEXT,
collection_name TEXT,
field_path TEXT,
index_column TEXT,
data_type TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (database_name, collection_name, field_path)
)
`);
}
cacheDocument(key, document) {
if (this.documentCache.size >= 10000) { // Max cache size
// Remove oldest entries (simple LRU simulation)
const firstKey = this.documentCache.keys().next().value;
this.documentCache.delete(firstKey);
}
this.documentCache.set(key, document);
}
getDocumentKey(collection, id) {
return `${this.config.name}:${collection}:${id}`;
}
getQueryCacheKey(operation, collection, filter, options) {
return `${operation}:${collection}:${JSON.stringify(filter)}:${JSON.stringify(options)}`;
}
clearQueryCache() {
this.queryCache.clear();
}
async storeInR2(collection, id, document) {
if (!this.r2Bucket)
return;
try {
const key = `documents/${this.config.name}/${collection}/${id}.json`;
await this.r2Bucket.put(key, JSON.stringify(document));
}
catch (error) {
console.warn('Failed to store document in R2:', error);
}
}
}
//# sourceMappingURL=document-storage.js.map