@a24z/markdown-search
Version:
High-performance full-text search for markdown documents
595 lines • 24.7 kB
JavaScript
"use strict";
/**
* SearchEngine - Main search engine implementation for markdown documents
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.SearchEngine = void 0;
const SearchEngineFactory_1 = require("./SearchEngineFactory");
const DocumentIndexer_1 = require("./DocumentIndexer");
// Debug logging utility - set to true to enable verbose logs
const DEBUG = false;
const debugLog = (...args) => {
if (DEBUG) {
debugLog(...args);
}
};
class SearchEngine {
indexer;
searchEngine;
storage;
markdownProvider;
indexKey;
constructor(config, indexKey = 'search-index') {
this.storage = config.storage;
this.markdownProvider = config.markdownProvider;
this.searchEngine = config.searchEngine || SearchEngineFactory_1.SearchEngineFactory.create('flexsearch');
this.indexer = new DocumentIndexer_1.DocumentIndexer();
this.indexKey = indexKey;
}
/**
* Initialize the search engine, loading any existing index
*/
async initialize() {
try {
// Initialize the search engine adapter
await this.searchEngine.initialize();
// Try to load existing index
const savedIndex = await this.storage.loadIndex(this.indexKey);
if (savedIndex) {
await this.searchEngine.importIndex(savedIndex);
}
}
catch (error) {
console.error('Failed to initialize search engine:', error);
// Continue with empty index
}
}
/**
* Index all markdown files in the workspace
*/
async indexFiles(options) {
debugLog('Indexing files...');
const startTime = Date.now();
debugLog('Start time:', startTime);
const errors = [];
debugLog('Errors:', errors);
let filesIndexed = 0;
debugLog('Files indexed:', filesIndexed);
let sectionsIndexed = 0;
debugLog('Sections indexed:', sectionsIndexed);
let documentsIndexed = 0;
debugLog('Documents indexed:', documentsIndexed);
try {
// Phase 1: Discovering files
debugLog('Phase 1: Discovering files');
if (options?.onProgress) {
options.onProgress({
phase: 'discovering',
filesProcessed: 0,
totalFiles: 0,
documentsIndexed: 0,
percentage: 0,
});
}
// Find all markdown files
const files = await this.markdownProvider.findMarkdownFiles(options?.fileOptions);
const totalFiles = files.length;
debugLog('Total files:', totalFiles);
// Report discovered files to UI
if (options?.onProgress) {
options.onProgress({
phase: 'discovering',
filesProcessed: 0,
totalFiles: totalFiles,
documentsIndexed: 0,
percentage: 5,
foundFiles: {
list: files.slice(0, Math.min(10, files.length)).map((f) => f.path), // Show first 10 files
total: totalFiles,
hasMore: totalFiles > 10,
},
});
}
// Start a new indexing session - this will ensure the first addDocuments call clears the index
debugLog('[SearchEngine] 🎯 About to start new indexing session...');
if (this.searchEngine.startNewIndexingSession) {
this.searchEngine.startNewIndexingSession();
debugLog('[SearchEngine] ✅ Successfully called startNewIndexingSession()');
}
else {
debugLog('[SearchEngine] ⚠️ WARNING: startNewIndexingSession method not available on search engine');
}
// Process files in smaller batches to avoid memory issues
// With large markdown files, even 10 files can consume too much memory
const batchSize = options?.batchSize || 3;
// Don't accumulate all documents - just track count for stats
let totalDocumentsIndexed = 0;
for (let i = 0; i < files.length; i += batchSize) {
const batch = files.slice(i, Math.min(i + batchSize, files.length));
debugLog('Batch:', batch);
const batchDocuments = [];
debugLog('Batch documents:', batchDocuments);
// Process each file in the batch
for (const file of batch) {
try {
// Phase 2: Parsing
if (options?.onProgress) {
options.onProgress({
phase: 'parsing',
currentFile: file.name,
filesProcessed: filesIndexed,
totalFiles: totalFiles,
documentsIndexed: documentsIndexed,
percentage: 10 + Math.round((filesIndexed / totalFiles) * 35), // 10-45%
});
}
debugLog('Reading file content...');
// Read file content
const content = await this.markdownProvider.readMarkdownFile(file.path);
debugLog('File content:', content);
// Parse and create search documents
const documents = await this.indexer.parseAndIndex(content, file, options);
debugLog('Documents:', documents);
// Count sections (documents of type 'section')
const sectionCount = documents.filter((doc) => doc.type === 'section').length;
sectionsIndexed += sectionCount;
documentsIndexed += documents.length;
debugLog('Documents indexed:', documentsIndexed);
batchDocuments.push(...documents);
filesIndexed++;
debugLog('Files indexed:', filesIndexed);
}
catch (error) {
errors.push({
file: file.path,
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
});
}
}
debugLog('Batch documents:', batchDocuments);
// Phase 3: Indexing
if (batchDocuments.length > 0) {
if (options?.onProgress) {
options.onProgress({
phase: 'indexing',
filesProcessed: filesIndexed,
totalFiles: totalFiles,
documentsIndexed: documentsIndexed,
percentage: 45 + Math.round((filesIndexed / totalFiles) * 35), // 45-80%
});
}
debugLog('Adding documents to search engine...');
debugLog(`[SearchEngine] About to add ${batchDocuments.length} documents to search engine (batch)`);
debugLog(`[SearchEngine] Total documents so far: ${totalDocumentsIndexed + batchDocuments.length}`);
await this.searchEngine.addDocuments(batchDocuments);
debugLog('Documents added to search engine...');
totalDocumentsIndexed += batchDocuments.length;
debugLog('Total documents indexed:', totalDocumentsIndexed);
// Clear batch documents to free memory
batchDocuments.length = 0;
}
}
debugLog('Total documents indexed:', totalDocumentsIndexed);
// Phase 4: Persisting
if (options?.onProgress) {
options.onProgress({
phase: 'persisting',
filesProcessed: filesIndexed,
totalFiles: totalFiles,
documentsIndexed: documentsIndexed,
percentage: 85,
});
}
debugLog('Exporting Index ...');
// Save the index
const indexData = await this.searchEngine.exportIndex();
const stats = {
totalFiles: filesIndexed,
totalSections: sectionsIndexed,
totalDocuments: documentsIndexed,
indexedAt: new Date().toISOString(),
};
debugLog('Saving index...');
await this.storage.saveIndex(this.indexKey, {
data: indexData,
metadata: {
version: '1.0.0',
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
stats,
},
});
debugLog('Index saved...');
// Complete
if (options?.onProgress) {
options.onProgress({
phase: 'persisting',
filesProcessed: filesIndexed,
totalFiles: totalFiles,
documentsIndexed: documentsIndexed,
percentage: 100,
});
}
debugLog('Index saved...');
return {
filesIndexed,
sectionsIndexed,
documentsIndexed,
errors: errors.length > 0 ? errors : undefined,
duration: Date.now() - startTime,
};
}
catch (error) {
throw new Error(`Indexing failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Search the index
*/
async search(query, options) {
debugLog('[SearchEngine] Search called with query:', query, 'options:', options);
if (!query || query.trim().length === 0) {
debugLog('[SearchEngine] Empty query, returning empty array');
return [];
}
// Pass options including filters to the search engine adapter
const results = await this.searchEngine.search(query.trim(), options);
return results;
}
/**
* Get index statistics
*/
async getStats() {
const metadata = await this.storage.getIndexMetadata(this.indexKey);
return metadata?.stats || null;
}
/**
* Check if index exists
*/
async hasIndex() {
return await this.storage.hasIndex(this.indexKey);
}
/**
* Clear the index
*/
async clearIndex() {
await this.searchEngine.clear();
await this.storage.deleteIndex(this.indexKey);
}
/**
* Update specific files in the index
*/
async updateFiles(filePaths, options) {
const startTime = Date.now();
const errors = [];
let filesIndexed = 0;
let sectionsIndexed = 0;
let documentsIndexed = 0;
try {
for (const filePath of filePaths) {
try {
// Get file info
const fileInfo = await this.markdownProvider.getFileInfo(filePath);
// Remove old documents for this file
const fileUri = fileInfo.uri || fileInfo.path;
const oldDocsToRemove = [];
// Use getAllDocuments if available, otherwise fallback to search
const allResults = this.searchEngine.getAllDocuments
? await this.searchEngine.getAllDocuments()
: await this.searchEngine.search(' ', { limit: 10000 });
allResults.forEach((result) => {
if (result.fileUri === fileUri || result.filePath === filePath) {
oldDocsToRemove.push(result.id);
}
});
if (oldDocsToRemove.length > 0) {
await this.searchEngine.removeDocuments(oldDocsToRemove);
}
// Read and index the file
const content = await this.markdownProvider.readMarkdownFile(filePath);
const documents = await this.indexer.parseAndIndex(content, fileInfo, options);
// Add new documents
if (documents.length > 0) {
await this.searchEngine.addDocuments(documents);
// Count sections
const sectionCount = documents.filter((doc) => doc.type === 'section').length;
sectionsIndexed += sectionCount;
documentsIndexed += documents.length;
}
filesIndexed++;
}
catch (error) {
errors.push({
file: filePath,
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
});
}
}
// Save updated index
const indexData = await this.searchEngine.exportIndex();
const existingMetadata = await this.storage.getIndexMetadata(this.indexKey);
await this.storage.saveIndex(this.indexKey, {
data: indexData,
metadata: {
version: existingMetadata?.version || '1.0.0',
createdAt: existingMetadata?.createdAt || new Date().toISOString(),
updatedAt: new Date().toISOString(),
stats: {
...existingMetadata?.stats,
indexedAt: new Date().toISOString(),
},
},
});
return {
filesIndexed,
sectionsIndexed,
documentsIndexed,
errors: errors.length > 0 ? errors : undefined,
duration: Date.now() - startTime,
};
}
catch (error) {
throw new Error(`Update failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Index a single document with custom metadata
*/
async indexDocument(filePath, metadata) {
try {
// Read file content
const fileInfo = await this.markdownProvider.getFileInfo(filePath);
const content = await this.markdownProvider.readMarkdownFile(filePath);
// Parse and create search documents
const documents = await this.indexer.parseAndIndex(content, fileInfo);
// Add custom metadata to each document
if (metadata) {
documents.forEach((doc) => {
doc.metadata = { ...doc.metadata, ...metadata };
});
}
// Add documents to search engine
await this.searchEngine.addDocuments(documents);
// Save updated index
await this.saveIndex();
}
catch (error) {
throw new Error(`Failed to index document: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Index multiple documents with metadata
*/
async indexDocumentsWithMetadata(items) {
const startTime = Date.now();
const errors = [];
let filesIndexed = 0;
let sectionsIndexed = 0;
let documentsIndexed = 0;
try {
for (const item of items) {
try {
// Read file content
const fileInfo = await this.markdownProvider.getFileInfo(item.path);
const content = await this.markdownProvider.readMarkdownFile(item.path);
// Parse and create search documents
const documents = await this.indexer.parseAndIndex(content, fileInfo);
// Add custom metadata to each document
if (item.metadata) {
documents.forEach((doc) => {
doc.metadata = { ...doc.metadata, ...item.metadata };
});
}
// Add documents to search engine
await this.searchEngine.addDocuments(documents);
// Count sections and documents
const sectionCount = documents.filter((doc) => doc.type === 'section').length;
sectionsIndexed += sectionCount;
documentsIndexed += documents.length;
filesIndexed++;
}
catch (error) {
errors.push({
file: item.path,
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
});
}
}
// Save updated index
await this.saveIndex();
return {
filesIndexed,
sectionsIndexed,
documentsIndexed,
errors: errors.length > 0 ? errors : undefined,
duration: Date.now() - startTime,
};
}
catch (error) {
throw new Error(`Failed to index documents: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Update an existing document
*/
async updateDocument(filePath, metadata) {
try {
// Remove old documents for this file
await this.removeDocument(filePath);
// Index the updated file
await this.indexDocument(filePath, metadata);
}
catch (error) {
throw new Error(`Failed to update document: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Remove a single document from the index
*/
async removeDocument(filePath) {
try {
// Get file info
const fileInfo = await this.markdownProvider.getFileInfo(filePath);
const fileUri = fileInfo.uri || fileInfo.path;
// Find all documents from this file
const docsToRemove = [];
// Use getAllDocuments if available, otherwise fallback to search
const allResults = this.searchEngine.getAllDocuments
? await this.searchEngine.getAllDocuments()
: await this.searchEngine.search(' ', { limit: 10000 });
allResults.forEach((result) => {
if (result.fileUri === fileUri || result.filePath === filePath) {
docsToRemove.push(result.id);
}
});
// Remove documents
if (docsToRemove.length > 0) {
await this.searchEngine.removeDocuments(docsToRemove);
await this.saveIndex();
}
}
catch (error) {
throw new Error(`Failed to remove document: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Remove all documents matching metadata criteria
*/
async removeDocumentsByMetadata(query) {
try {
const docsToRemove = [];
// Use getAllDocuments if available, otherwise fallback to search
const allResults = this.searchEngine.getAllDocuments
? await this.searchEngine.getAllDocuments()
: await this.searchEngine.search(' ', { limit: 10000 });
allResults.forEach((result) => {
if (result.metadata && this.matchesMetadata(result.metadata, query)) {
docsToRemove.push(result.id);
}
});
// Remove matching documents
if (docsToRemove.length > 0) {
await this.searchEngine.removeDocuments(docsToRemove);
await this.saveIndex();
}
return docsToRemove.length;
}
catch (error) {
throw new Error(`Failed to remove documents by metadata: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Check if document exists in index
*/
async hasDocument(filePath) {
try {
const fileInfo = await this.markdownProvider.getFileInfo(filePath);
const fileUri = fileInfo.uri || fileInfo.path;
// Use getAllDocuments if available, otherwise fallback to search
const results = this.searchEngine.getAllDocuments
? await this.searchEngine.getAllDocuments()
: await this.searchEngine.search(' ', { limit: 10000 });
return results.some((result) => result.fileUri === fileUri || result.filePath === filePath);
}
catch {
// If file doesn't exist or other error, return false
return false;
}
}
/**
* Helper function to check if metadata matches query
*/
matchesMetadata(metadata, query) {
for (const [key, value] of Object.entries(query)) {
if (metadata[key] !== value) {
return false;
}
}
return true;
}
/**
* Index documents directly (for non-file-based content like notes)
*/
async indexDocuments(documents, options) {
const startTime = Date.now();
const errors = [];
try {
// Phase 1: Clear if requested
if (options?.clearBefore) {
await this.clearIndex();
}
// Phase 2: Start new indexing session if adapter supports it
if (this.searchEngine.startNewIndexingSession) {
this.searchEngine.startNewIndexingSession();
}
// Phase 3: Add documents
if (options?.onProgress) {
options.onProgress({
phase: 'indexing',
filesProcessed: 0,
totalFiles: documents.length,
documentsIndexed: 0,
percentage: 50,
});
}
await this.searchEngine.addDocuments(documents);
// Phase 4: Save index
await this.saveIndex();
if (options?.onProgress) {
options.onProgress({
phase: 'complete',
filesProcessed: documents.length,
totalFiles: documents.length,
documentsIndexed: documents.length,
percentage: 100,
});
}
return {
filesIndexed: 0, // No files in document-based indexing
sectionsIndexed: documents.filter((d) => d.type === 'section').length,
documentsIndexed: documents.length,
errors: errors.length > 0 ? errors : undefined,
duration: Date.now() - startTime,
};
}
catch (error) {
throw new Error(`Document indexing failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Save the current index to storage
*/
async saveIndex() {
try {
const indexData = await this.searchEngine.exportIndex();
await this.storage.saveIndex(this.indexKey, {
data: indexData,
metadata: {
version: '1.0.0',
updatedAt: new Date().toISOString(),
stats: await this.getStats(),
},
});
}
catch (error) {
throw new Error(`Failed to save index: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Get direct access to the search engine adapter (use with caution)
*/
getSearchAdapter() {
return this.searchEngine;
}
/**
* Get direct access to the storage adapter (use with caution)
*/
getStorageAdapter() {
return this.storage;
}
}
exports.SearchEngine = SearchEngine;
//# sourceMappingURL=SearchEngine.js.map