UNPKG

ai-embed-search

Version:

Smart. Simple. Local. AI-powered semantic search in TypeScript using transformer embeddings. No cloud, no API keys — 100% offline.

137 lines (132 loc) 3.91 kB
// src/utils/cosine.ts function cosineSimilarity(vecA, vecB) { const dot = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0); const normA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0)); const normB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0)); return dot / (normA * normB + 1e-8); } // src/core/vectorStore.ts import { writeFile, readFile } from "fs/promises"; var vectorStore = []; async function saveVectors(filePath) { await writeFile(filePath, JSON.stringify(vectorStore, null, 2), "utf-8"); } async function loadVectors(filePath) { const raw = await readFile(filePath, "utf-8"); vectorStore = JSON.parse(raw); } function removeVector(id) { vectorStore = vectorStore.filter((entry) => entry.id !== id); } function clearVectors() { vectorStore = []; } // src/core/cache.ts var cache = /* @__PURE__ */ new Map(); function getCached(query, maxItems) { const key = `${query}:${maxItems}`; const cached = cache.get(key); return cached && cached.expires > Date.now() ? cached.result : null; } function setCached(query, maxItems, seconds, result) { const key = `${query}:${maxItems}`; cache.set(key, { expires: Date.now() + seconds * 1e3, result }); } // src/core/engine.ts var embedFn; function initEmbedder(options) { embedFn = options.embedder; } async function embed(items) { if (!embedFn) throw new Error("ai-search: embedder not initialized"); const texts = items.map((item) => item.text); let vectors; try { const batchResult = await embedFn(texts); if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) { vectors = batchResult; } else { throw new Error("Embed function did not return batch output"); } } catch { vectors = []; for (const text of texts) { const vector = await embedFn(text); vectors.push(vector); } } for (let i = 0; i < items.length; i++) { vectorStore.push({ id: items[i].id, text: items[i].text, vector: vectors[i], meta: items[i].meta }); } } function search(query, maxItems = 5) { let filterFn = () => true; const runSearch = async () => { if (!embedFn) throw new Error("ai-search: embedder not initialized"); const cached = getCached(query, maxItems); if (cached) return cached.filter(filterFn); const queryVec = await embedFn(query); const results = vectorStore.map((entry) => ({ id: entry.id, text: entry.text, score: cosineSimilarity(entry.vector, queryVec), meta: entry.meta })); return results.sort((a, b) => b.score - a.score).slice(0, maxItems).filter(filterFn); }; return { filter(fn) { filterFn = fn; return this; }, exec: async () => await runSearch(), cacheFor: async (seconds) => { const result = await runSearch(); setCached(query, maxItems, seconds, result); return result; } }; } async function getSimilarItems(id, maxItems = 5) { const target = vectorStore.find((item) => item.id === id); if (!target) throw new Error(`Item with id ${id} not found`); const results = vectorStore.filter((entry) => entry.id !== id).map((entry) => ({ id: entry.id, text: entry.text, score: cosineSimilarity(entry.vector, target.vector), meta: entry.meta })); return results.sort((a, b) => b.score - a.score).slice(0, maxItems); } // src/core/embedder.ts import { pipeline } from "@xenova/transformers/src/transformers.js"; async function createEmbedder() { const embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); return async (text) => { const result = await embedder(text, { pooling: "mean", normalize: true }); return Array.from(result.data); }; } export { vectorStore as _vectorStore, clearVectors, createEmbedder, embed, getSimilarItems, initEmbedder, loadVectors, removeVector, saveVectors, search };