@hpbyte/h-codex-core
Version:
Core indexing and search functionality for h-codex
71 lines • 2.75 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.semanticSearch = exports.SemanticSearch = void 0;
const embedder_1 = require("../ingestion/embedder");
const chunk_embeddings_repository_1 = require("../storage/chunk-embeddings.repository");
class SemanticSearch {
async search(query, options = {}) {
const queryEmbeddings = await this.getQueryEmbeddings(query);
const contextResults = await chunk_embeddings_repository_1.chunkEmbeddingsRepository.findSimilarChunksWithContext(queryEmbeddings, options);
const results = contextResults.map((result, index) => ({
chunk: result.chunk,
similarity: result.similarity,
context: result.context,
rank: index + 1,
}));
const processedResults = this.calculateRelevanceScores(results, query);
return processedResults;
}
async getQueryEmbeddings(query) {
const preprocessed = this.sanitizeQuery(query);
try {
const embedding = await embedder_1.embedder.generateEmbeddings([preprocessed]);
if (!embedding || embedding.length === 0 || !embedding[0]) {
throw new Error('Failed to generate embedding for query');
}
return embedding[0];
}
catch (error) {
throw new Error(`Failed to prepare search query: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
sanitizeQuery(query) {
return query
.toLowerCase()
.replace(/[^\w\s]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
calculateRelevanceScores(results, query) {
return results.map(result => {
const semanticScore = result.similarity;
const textScore = this.calculateTextMatchScore(result.chunk.content, query);
const relevanceScore = semanticScore * 0.6 + textScore * 0.3;
return {
...result,
relevanceScore,
};
});
}
calculateTextMatchScore(content, query) {
if (!content || !query)
return 0;
const queryTerms = query
.toLowerCase()
.split(/\s+/)
.filter(term => term.length > 0);
if (queryTerms.length === 0)
return 0;
const contentLower = content.toLowerCase();
let score = 0;
for (const term of queryTerms) {
if (contentLower.includes(term)) {
score += 1 / queryTerms.length;
}
}
return Math.min(score, 1.0);
}
}
exports.SemanticSearch = SemanticSearch;
exports.semanticSearch = new SemanticSearch();
//# sourceMappingURL=index.js.map