ai-embed-search
Version:
Smart. Simple. Local. AI-powered semantic search in TypeScript using transformer embeddings. No cloud, no API keys — 100% offline.
137 lines (132 loc) • 3.91 kB
JavaScript
// src/utils/cosine.ts
function cosineSimilarity(vecA, vecB) {
const dot = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
const normA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
const normB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));
return dot / (normA * normB + 1e-8);
}
// src/core/vectorStore.ts
import { writeFile, readFile } from "fs/promises";
var vectorStore = [];
async function saveVectors(filePath) {
await writeFile(filePath, JSON.stringify(vectorStore, null, 2), "utf-8");
}
async function loadVectors(filePath) {
const raw = await readFile(filePath, "utf-8");
vectorStore = JSON.parse(raw);
}
function removeVector(id) {
vectorStore = vectorStore.filter((entry) => entry.id !== id);
}
function clearVectors() {
vectorStore = [];
}
// src/core/cache.ts
var cache = /* @__PURE__ */ new Map();
function getCached(query, maxItems) {
const key = `${query}:${maxItems}`;
const cached = cache.get(key);
return cached && cached.expires > Date.now() ? cached.result : null;
}
function setCached(query, maxItems, seconds, result) {
const key = `${query}:${maxItems}`;
cache.set(key, {
expires: Date.now() + seconds * 1e3,
result
});
}
// src/core/engine.ts
var embedFn;
function initEmbedder(options) {
embedFn = options.embedder;
}
async function embed(items) {
if (!embedFn) throw new Error("ai-search: embedder not initialized");
const texts = items.map((item) => item.text);
let vectors;
try {
const batchResult = await embedFn(texts);
if (Array.isArray(batchResult) && Array.isArray(batchResult[0])) {
vectors = batchResult;
} else {
throw new Error("Embed function did not return batch output");
}
} catch {
vectors = [];
for (const text of texts) {
const vector = await embedFn(text);
vectors.push(vector);
}
}
for (let i = 0; i < items.length; i++) {
vectorStore.push({
id: items[i].id,
text: items[i].text,
vector: vectors[i],
meta: items[i].meta
});
}
}
function search(query, maxItems = 5) {
let filterFn = () => true;
const runSearch = async () => {
if (!embedFn) throw new Error("ai-search: embedder not initialized");
const cached = getCached(query, maxItems);
if (cached) return cached.filter(filterFn);
const queryVec = await embedFn(query);
const results = vectorStore.map((entry) => ({
id: entry.id,
text: entry.text,
score: cosineSimilarity(entry.vector, queryVec),
meta: entry.meta
}));
return results.sort((a, b) => b.score - a.score).slice(0, maxItems).filter(filterFn);
};
return {
filter(fn) {
filterFn = fn;
return this;
},
exec: async () => await runSearch(),
cacheFor: async (seconds) => {
const result = await runSearch();
setCached(query, maxItems, seconds, result);
return result;
}
};
}
async function getSimilarItems(id, maxItems = 5) {
const target = vectorStore.find((item) => item.id === id);
if (!target) throw new Error(`Item with id ${id} not found`);
const results = vectorStore.filter((entry) => entry.id !== id).map((entry) => ({
id: entry.id,
text: entry.text,
score: cosineSimilarity(entry.vector, target.vector),
meta: entry.meta
}));
return results.sort((a, b) => b.score - a.score).slice(0, maxItems);
}
// src/core/embedder.ts
import { pipeline } from "@xenova/transformers/src/transformers.js";
async function createEmbedder() {
const embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
return async (text) => {
const result = await embedder(text, {
pooling: "mean",
normalize: true
});
return Array.from(result.data);
};
}
export {
vectorStore as _vectorStore,
clearVectors,
createEmbedder,
embed,
getSimilarItems,
initEmbedder,
loadVectors,
removeVector,
saveVectors,
search
};