@andrejs1979/document
Version:
MongoDB-compatible document database for NoSQL
598 lines • 25 kB
JavaScript
/**
* NoSQL - Document Index Manager
* Advanced indexing system for fast document queries
*/
import { DocumentError } from '../types';
/**
* Smart index manager with auto-indexing capabilities
*/
export class DocumentIndexManager {
d1;
kvStore;
config;
indexCache = new Map();
queryPatterns = new Map(); // Track query patterns for auto-indexing
fieldUsage = new Map(); // Track field usage
constructor(config) {
this.config = config;
this.d1 = config.d1Database;
this.kvStore = config.kvStore;
}
/**
* Create a new index
*/
async createIndex(database, collection, indexSpec) {
try {
const indexName = this.generateIndexName(database, collection, indexSpec);
// Check if index already exists
if (await this.indexExists(database, collection, indexName)) {
throw new DocumentError(`Index ${indexName} already exists`, 'INDEX_EXISTS');
}
// Create the index based on type
await this.createIndexByType(database, collection, indexName, indexSpec);
// Store index metadata
await this.storeIndexMetadata(database, collection, indexName, indexSpec);
// Cache the index info
this.cacheIndexInfo(database, collection, indexName, indexSpec);
console.log(`Created index ${indexName} for ${database}.${collection}`);
}
catch (error) {
throw new DocumentError(`Failed to create index: ${error.message}`, 'INDEX_CREATION_ERROR');
}
}
/**
* Drop an existing index
*/
async dropIndex(database, collection, indexName) {
try {
// Get index info
const indexInfo = await this.getIndexInfo(database, collection, indexName);
if (!indexInfo) {
throw new DocumentError(`Index ${indexName} does not exist`, 'INDEX_NOT_FOUND');
}
// Drop the index
await this.dropIndexByType(database, collection, indexInfo);
// Remove metadata
await this.removeIndexMetadata(database, collection, indexName);
// Remove from cache
this.indexCache.delete(this.getIndexCacheKey(database, collection, indexName));
console.log(`Dropped index ${indexName} for ${database}.${collection}`);
}
catch (error) {
throw new DocumentError(`Failed to drop index: ${error.message}`, 'INDEX_DROP_ERROR');
}
}
/**
* List all indexes for a collection
*/
async listIndexes(database, collection) {
try {
const result = await this.d1.prepare(`
SELECT index_name, index_spec, created_at, size_bytes, usage_count
FROM index_metadata
WHERE database_name = ? AND collection_name = ?
ORDER BY created_at ASC
`).bind(database, collection).all();
const indexes = [];
for (const row of result.results || []) {
const spec = JSON.parse(row.index_spec);
indexes.push({
name: row.index_name,
key: spec.key,
options: spec.options || {},
size: row.size_bytes || 0,
stats: {
accesses: row.usage_count || 0,
lastAccessed: new Date(row.created_at),
usage: row.usage_count || 0
}
});
}
return indexes;
}
catch (error) {
throw new DocumentError(`Failed to list indexes: ${error.message}`, 'INDEX_LIST_ERROR');
}
}
/**
* Get index recommendations based on query patterns
*/
async getIndexRecommendations(database, collection) {
const recommendations = [];
const reasons = [];
// Analyze query patterns
const patternAnalysis = await this.analyzeQueryPatterns(database, collection);
// Check field usage frequency
const frequentFields = this.getFrequentlyUsedFields(database, collection);
// Recommend single-field indexes for frequently queried fields
for (const field of frequentFields) {
if (field.usage > (this.config.autoIndexThreshold || 100)) {
const indexSpec = {
key: { [field.name]: 1 }
};
if (!await this.hasEquivalentIndex(database, collection, indexSpec)) {
recommendations.push(indexSpec);
reasons.push(`Field '${field.name}' is frequently queried (${field.usage} times)`);
}
}
}
// Recommend compound indexes for common query patterns
for (const pattern of patternAnalysis) {
if (pattern.frequency > 50 && pattern.fields.length > 1) {
const indexSpec = {
key: pattern.fields.reduce((acc, field) => ({ ...acc, [field]: 1 }), {})
};
if (!await this.hasEquivalentIndex(database, collection, indexSpec)) {
recommendations.push(indexSpec);
reasons.push(`Compound index for common query pattern: ${pattern.fields.join(', ')}`);
}
}
}
// Recommend text indexes for text search patterns
const textSearchFields = this.getTextSearchFields(database, collection);
if (textSearchFields.length > 0) {
const textIndexSpec = {
key: textSearchFields.reduce((acc, field) => ({ ...acc, [field]: 'text' }), {}),
options: { name: `text_index_${collection}` }
};
if (!await this.hasEquivalentIndex(database, collection, textIndexSpec)) {
recommendations.push(textIndexSpec);
reasons.push(`Text index for full-text search on: ${textSearchFields.join(', ')}`);
}
}
// Recommend vector indexes if vector operations are detected
const hasVectorOperations = await this.hasVectorOperations(database, collection);
if (hasVectorOperations) {
const vectorIndexSpec = {
key: { '_vector': 'vector' },
options: {
name: `vector_index_${collection}`,
vectorOptions: {
dimensions: this.config.vectorConfig?.defaultDimensions || 1536,
similarity: 'cosine',
type: 'hnsw'
}
}
};
if (!await this.hasEquivalentIndex(database, collection, vectorIndexSpec)) {
recommendations.push(vectorIndexSpec);
reasons.push('Vector index for similarity search operations');
}
}
// Determine priority based on potential impact
let priority = 'low';
if (recommendations.length > 0) {
const totalQueryCount = Array.from(this.queryPatterns.values()).reduce((sum, count) => sum + count, 0);
if (totalQueryCount > 1000) {
priority = 'high';
}
else if (totalQueryCount > 100) {
priority = 'medium';
}
}
return { recommended: recommendations, reasons, priority };
}
/**
* Auto-create indexes based on query patterns
*/
async autoCreateIndexes(database, collection) {
if (!this.config.enableAutoIndexing) {
return;
}
try {
const recommendations = await this.getIndexRecommendations(database, collection);
for (const indexSpec of recommendations.recommended) {
try {
await this.createIndex(database, collection, indexSpec);
console.log(`Auto-created index for ${database}.${collection}:`, indexSpec);
}
catch (error) {
console.warn(`Failed to auto-create index:`, error.message);
}
}
}
catch (error) {
console.warn(`Auto-indexing failed for ${database}.${collection}:`, error.message);
}
}
/**
* Track query pattern for auto-indexing
*/
trackQueryPattern(database, collection, filter, options = {}) {
if (!this.config.enableAutoIndexing) {
return;
}
// Extract fields from query
const fields = this.extractQueryFields(filter);
const pattern = `${database}.${collection}:${fields.sort().join(',')}`;
// Update pattern frequency
this.queryPatterns.set(pattern, (this.queryPatterns.get(pattern) || 0) + 1);
// Update field usage
for (const field of fields) {
const key = `${database}.${collection}.${field}`;
const current = this.fieldUsage.get(key) || { count: 0, lastUsed: new Date() };
this.fieldUsage.set(key, {
count: current.count + 1,
lastUsed: new Date()
});
}
// Trigger auto-indexing if threshold is reached
const patternCount = this.queryPatterns.get(pattern) || 0;
if (patternCount % (this.config.autoIndexThreshold || 100) === 0) {
// Run auto-indexing asynchronously
this.autoCreateIndexes(database, collection).catch(console.error);
}
}
/**
* Get optimal index for a query
*/
async getOptimalIndex(database, collection, filter, sort) {
const indexes = await this.listIndexes(database, collection);
// Extract query fields
const queryFields = this.extractQueryFields(filter);
const sortFields = sort ? Object.keys(sort) : [];
let bestIndex = null;
let bestScore = 0;
for (const index of indexes) {
const score = this.calculateIndexScore(index, queryFields, sortFields);
if (score > bestScore) {
bestScore = score;
bestIndex = index;
}
}
return bestIndex;
}
/**
* Rebuild index (refresh statistics and optimize)
*/
async rebuildIndex(database, collection, indexName) {
try {
const indexInfo = await this.getIndexInfo(database, collection, indexName);
if (!indexInfo) {
throw new DocumentError(`Index ${indexName} not found`, 'INDEX_NOT_FOUND');
}
// For SQLite, we can't directly rebuild indexes, but we can drop and recreate
const indexSpec = {
key: indexInfo.key,
options: indexInfo.options
};
// Drop the old index
await this.dropIndex(database, collection, indexName);
// Recreate the index
await this.createIndex(database, collection, indexSpec);
console.log(`Rebuilt index ${indexName} for ${database}.${collection}`);
}
catch (error) {
throw new DocumentError(`Failed to rebuild index: ${error.message}`, 'INDEX_REBUILD_ERROR');
}
}
/**
* Get index usage statistics
*/
async getIndexStats(database, collection) {
const indexes = await this.listIndexes(database, collection);
const stats = {
totalIndexes: indexes.length,
totalSize: indexes.reduce((sum, idx) => sum + idx.size, 0),
usageStats: indexes.reduce((acc, idx) => {
acc[idx.name] = idx.stats?.usage || 0;
return acc;
}, {}),
recommendations: []
};
// Generate recommendations
const unusedIndexes = indexes.filter(idx => (idx.stats?.usage || 0) < 10);
if (unusedIndexes.length > 0) {
stats.recommendations.push(`Consider dropping unused indexes: ${unusedIndexes.map(idx => idx.name).join(', ')}`);
}
const recommendations = await this.getIndexRecommendations(database, collection);
if (recommendations.recommended.length > 0) {
stats.recommendations.push(`Consider creating ${recommendations.recommended.length} recommended indexes`);
}
return stats;
}
// ===============================
// Private Methods
// ===============================
async createIndexByType(database, collection, indexName, indexSpec) {
const fields = Object.keys(indexSpec.key);
const indexTypes = Object.values(indexSpec.key);
// Determine index type
if (indexTypes.includes('text')) {
await this.createTextIndex(database, collection, indexName, indexSpec);
}
else if (indexTypes.includes('vector')) {
await this.createVectorIndex(database, collection, indexName, indexSpec);
}
else if (indexTypes.includes('2dsphere') || indexTypes.includes('2d')) {
await this.createGeospatialIndex(database, collection, indexName, indexSpec);
}
else {
await this.createBTreeIndex(database, collection, indexName, indexSpec);
}
}
async createBTreeIndex(database, collection, indexName, indexSpec) {
const fields = Object.entries(indexSpec.key);
if (fields.length === 1) {
// Single field index
const [field, direction] = fields[0];
const order = direction === -1 ? 'DESC' : 'ASC';
// Try to use indexed column if available
const indexColumn = this.getIndexColumn(field);
if (indexColumn) {
await this.d1.exec(`
CREATE INDEX ${indexName} ON documents(${indexColumn} ${order})
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE
`);
}
else {
await this.d1.exec(`
CREATE INDEX ${indexName} ON documents(JSON_EXTRACT(_data, '$.${field}') ${order})
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE
`);
}
}
else {
// Compound index
const indexColumns = fields.map(([field, direction]) => {
const order = direction === -1 ? 'DESC' : 'ASC';
const indexColumn = this.getIndexColumn(field);
if (indexColumn) {
return `${indexColumn} ${order}`;
}
else {
return `JSON_EXTRACT(_data, '$.${field}') ${order}`;
}
});
await this.d1.exec(`
CREATE INDEX ${indexName} ON documents(${indexColumns.join(', ')})
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE
`);
}
}
async createTextIndex(database, collection, indexName, indexSpec) {
// SQLite doesn't have built-in full-text search, so we use the _searchText field
await this.d1.exec(`
CREATE INDEX ${indexName} ON documents(_searchText)
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE AND _searchText IS NOT NULL
`);
}
async createVectorIndex(database, collection, indexName, indexSpec) {
// Vector indexes are handled by the vector storage system
// For now, just create an index on the vector presence
await this.d1.exec(`
CREATE INDEX ${indexName} ON documents(_vector)
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE AND _vector IS NOT NULL
`);
}
async createGeospatialIndex(database, collection, indexName, indexSpec) {
// Simplified geospatial indexing
const geoFields = Object.keys(indexSpec.key);
for (const field of geoFields) {
await this.d1.exec(`
CREATE INDEX ${indexName}_${field}_lat ON documents(JSON_EXTRACT(_data, '$.${field}.lat'))
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE
`);
await this.d1.exec(`
CREATE INDEX ${indexName}_${field}_lng ON documents(JSON_EXTRACT(_data, '$.${field}.lng'))
WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE
`);
}
}
async dropIndexByType(database, collection, indexInfo) {
// Drop the main index
await this.d1.exec(`DROP INDEX IF EXISTS ${indexInfo.name}`);
// Drop any related indexes (for geospatial, etc.)
const indexTypes = Object.values(indexInfo.key);
if (indexTypes.includes('2dsphere') || indexTypes.includes('2d')) {
const geoFields = Object.keys(indexInfo.key);
for (const field of geoFields) {
await this.d1.exec(`DROP INDEX IF EXISTS ${indexInfo.name}_${field}_lat`);
await this.d1.exec(`DROP INDEX IF EXISTS ${indexInfo.name}_${field}_lng`);
}
}
}
generateIndexName(database, collection, indexSpec) {
if (indexSpec.options?.name) {
return indexSpec.options.name;
}
const fields = Object.keys(indexSpec.key).join('_');
const types = Object.values(indexSpec.key).join('_');
return `idx_${database}_${collection}_${fields}_${types}`.replace(/[^a-zA-Z0-9_]/g, '_');
}
async indexExists(database, collection, indexName) {
const result = await this.d1.prepare(`
SELECT 1 FROM index_metadata
WHERE database_name = ? AND collection_name = ? AND index_name = ?
`).bind(database, collection, indexName).first();
return !!result;
}
async storeIndexMetadata(database, collection, indexName, indexSpec) {
await this.d1.prepare(`
INSERT INTO index_metadata (
database_name, collection_name, index_name, index_spec, created_at, size_bytes, usage_count
) VALUES (?, ?, ?, ?, ?, ?, ?)
`).bind(database, collection, indexName, JSON.stringify(indexSpec), new Date().toISOString(), 0, 0).run();
}
async removeIndexMetadata(database, collection, indexName) {
await this.d1.prepare(`
DELETE FROM index_metadata
WHERE database_name = ? AND collection_name = ? AND index_name = ?
`).bind(database, collection, indexName).run();
}
async getIndexInfo(database, collection, indexName) {
const cacheKey = this.getIndexCacheKey(database, collection, indexName);
if (this.indexCache.has(cacheKey)) {
return this.indexCache.get(cacheKey);
}
const result = await this.d1.prepare(`
SELECT index_spec, created_at, size_bytes, usage_count
FROM index_metadata
WHERE database_name = ? AND collection_name = ? AND index_name = ?
`).bind(database, collection, indexName).first();
if (!result) {
return null;
}
const spec = JSON.parse(result.index_spec);
const indexInfo = {
name: indexName,
key: spec.key,
options: spec.options || {},
size: result.size_bytes || 0,
stats: {
accesses: result.usage_count || 0,
lastAccessed: new Date(result.created_at),
usage: result.usage_count || 0
}
};
this.indexCache.set(cacheKey, indexInfo);
return indexInfo;
}
cacheIndexInfo(database, collection, indexName, indexSpec) {
const cacheKey = this.getIndexCacheKey(database, collection, indexName);
const indexInfo = {
name: indexName,
key: indexSpec.key,
options: indexSpec.options || {},
size: 0,
stats: {
accesses: 0,
lastAccessed: new Date(),
usage: 0
}
};
this.indexCache.set(cacheKey, indexInfo);
}
getIndexCacheKey(database, collection, indexName) {
return `${database}.${collection}.${indexName}`;
}
extractQueryFields(filter) {
const fields = [];
const extractFromObject = (obj, path = '') => {
for (const [key, value] of Object.entries(obj)) {
if (key.startsWith('$')) {
// Operator - check its value
if (Array.isArray(value)) {
for (const item of value) {
if (typeof item === 'object') {
extractFromObject(item, path);
}
}
}
else if (typeof value === 'object' && value !== null) {
extractFromObject(value, path);
}
}
else {
// Field name
const fieldPath = path ? `${path}.${key}` : key;
fields.push(fieldPath);
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
extractFromObject(value, fieldPath);
}
}
}
};
extractFromObject(filter);
return [...new Set(fields)]; // Remove duplicates
}
getIndexColumn(field) {
// Map common fields to indexed columns
const fieldMapping = {
'status': '_idx_field_1',
'type': '_idx_field_2',
'category': '_idx_field_3',
'userId': '_idx_field_4',
'email': '_idx_field_5',
'score': '_idx_field_6',
'rating': '_idx_field_7',
'price': '_idx_field_8',
'views': '_idx_field_9',
'likes': '_idx_field_10'
};
return fieldMapping[field] || null;
}
async analyzeQueryPatterns(database, collection) {
const patterns = [];
const prefix = `${database}.${collection}:`;
for (const [pattern, frequency] of this.queryPatterns.entries()) {
if (pattern.startsWith(prefix)) {
const fieldsStr = pattern.substring(prefix.length);
if (fieldsStr) {
const fields = fieldsStr.split(',');
patterns.push({ fields, frequency });
}
}
}
return patterns.sort((a, b) => b.frequency - a.frequency);
}
getFrequentlyUsedFields(database, collection) {
const fields = [];
const prefix = `${database}.${collection}.`;
for (const [key, stats] of this.fieldUsage.entries()) {
if (key.startsWith(prefix)) {
const fieldName = key.substring(prefix.length);
fields.push({ name: fieldName, usage: stats.count });
}
}
return fields.sort((a, b) => b.usage - a.usage);
}
getTextSearchFields(database, collection) {
// Common text search fields
return ['title', 'content', 'description', 'text', 'name', 'summary'];
}
async hasVectorOperations(database, collection) {
// Check if there are any vector operations in query patterns
const prefix = `${database}.${collection}:`;
for (const pattern of this.queryPatterns.keys()) {
if (pattern.startsWith(prefix) && pattern.includes('_vector')) {
return true;
}
}
return false;
}
async hasEquivalentIndex(database, collection, indexSpec) {
const indexes = await this.listIndexes(database, collection);
for (const index of indexes) {
if (this.indexSpecsEqual(index.key, indexSpec.key)) {
return true;
}
}
return false;
}
indexSpecsEqual(key1, key2) {
const keys1 = Object.keys(key1).sort();
const keys2 = Object.keys(key2).sort();
if (keys1.length !== keys2.length) {
return false;
}
for (let i = 0; i < keys1.length; i++) {
if (keys1[i] !== keys2[i] || key1[keys1[i]] !== key2[keys2[i]]) {
return false;
}
}
return true;
}
calculateIndexScore(index, queryFields, sortFields) {
let score = 0;
const indexFields = Object.keys(index.key);
// Score based on field overlap
for (const queryField of queryFields) {
if (indexFields.includes(queryField)) {
score += 10;
}
}
// Score based on sort field overlap
for (const sortField of sortFields) {
if (indexFields.includes(sortField)) {
score += 5;
}
}
// Prefer more specific indexes (fewer fields)
score -= indexFields.length;
// Prefer frequently used indexes
score += (index.stats?.usage || 0) * 0.1;
return score;
}
}
//# sourceMappingURL=index-manager.js.map