ai-functions
Version:
Core AI primitives for building intelligent applications
170 lines • 5.26 kB
JavaScript
/**
* Embedding utilities from AI SDK
*
* Re-exports embed, embedMany, and cosineSimilarity from the Vercel AI SDK
* with additional convenience wrappers.
*
* Default model: Cloudflare Workers AI @cf/baai/bge-m3
*
* @packageDocumentation
*/
// Re-export core embedding functions from AI SDK
export { embed, embedMany, cosineSimilarity } from 'ai';
// Re-export Cloudflare provider from ai-providers
export { cloudflare, cloudflareEmbedding, DEFAULT_CF_EMBEDDING_MODEL } from 'ai-providers/cloudflare';
import { embed as aiEmbed, embedMany as aiEmbedMany } from 'ai';
import { cloudflareEmbedding, DEFAULT_CF_EMBEDDING_MODEL } from 'ai-providers/cloudflare';
/**
* Get the default embedding model (Cloudflare @cf/baai/bge-m3)
*/
export function getDefaultEmbeddingModel() {
return cloudflareEmbedding(DEFAULT_CF_EMBEDDING_MODEL);
}
/**
* Embed a single value using the default Cloudflare model
*
* @example
* ```ts
* import { embedText } from 'ai-functions'
*
* const { embedding } = await embedText('hello world')
* ```
*/
export async function embedText(value) {
return aiEmbed({
model: getDefaultEmbeddingModel(),
value
});
}
/**
* Embed multiple values using the default Cloudflare model
*
* @example
* ```ts
* import { embedTexts } from 'ai-functions'
*
* const { embeddings } = await embedTexts(['doc1', 'doc2', 'doc3'])
* ```
*/
export async function embedTexts(values) {
return aiEmbedMany({
model: getDefaultEmbeddingModel(),
values
});
}
/**
* Find the most similar items to a query embedding
*
* @example
* ```ts
* import { embed, embedMany, findSimilar } from 'ai-functions'
*
* const documents = ['doc1', 'doc2', 'doc3']
* const { embeddings } = await embedMany({ model, values: documents })
* const { embedding: queryEmbedding } = await embed({ model, value: 'search query' })
*
* const results = findSimilar(queryEmbedding, embeddings, documents, { topK: 2 })
* // [{ item: 'doc1', score: 0.95, index: 0 }, { item: 'doc2', score: 0.82, index: 1 }]
* ```
*/
export function findSimilar(queryEmbedding, embeddings, items, options = {}) {
const { topK = 10, minScore = 0 } = options;
// Import cosineSimilarity dynamically to avoid issues if ai isn't installed
const { cosineSimilarity } = require('ai');
const scored = embeddings
.map((embedding, index) => ({
item: items[index],
score: cosineSimilarity(queryEmbedding, embedding),
index
}))
.filter(result => result.score >= minScore)
.sort((a, b) => b.score - a.score)
.slice(0, topK);
return scored;
}
/**
* Calculate pairwise similarities between all embeddings
*
* @example
* ```ts
* const matrix = pairwiseSimilarity(embeddings)
* // matrix[i][j] = similarity between embeddings[i] and embeddings[j]
* ```
*/
export function pairwiseSimilarity(embeddings) {
const { cosineSimilarity } = require('ai');
const n = embeddings.length;
const matrix = Array(n).fill(null).map(() => Array(n).fill(0));
for (let i = 0; i < n; i++) {
matrix[i][i] = 1; // Self-similarity is always 1
for (let j = i + 1; j < n; j++) {
const sim = cosineSimilarity(embeddings[i], embeddings[j]);
matrix[i][j] = sim;
matrix[j][i] = sim;
}
}
return matrix;
}
/**
* Cluster embeddings by similarity using a simple threshold-based approach
*
* @example
* ```ts
* const clusters = clusterBySimilarity(embeddings, items, { threshold: 0.8 })
* // [[item1, item2], [item3], [item4, item5, item6]]
* ```
*/
export function clusterBySimilarity(embeddings, items, options = {}) {
const { threshold = 0.8 } = options;
const { cosineSimilarity } = require('ai');
const n = embeddings.length;
const assigned = new Set();
const clusters = [];
for (let i = 0; i < n; i++) {
if (assigned.has(i))
continue;
const cluster = [items[i]];
assigned.add(i);
for (let j = i + 1; j < n; j++) {
if (assigned.has(j))
continue;
const sim = cosineSimilarity(embeddings[i], embeddings[j]);
if (sim >= threshold) {
cluster.push(items[j]);
assigned.add(j);
}
}
clusters.push(cluster);
}
return clusters;
}
/**
* Average multiple embeddings into a single embedding
* Useful for creating document embeddings from chunk embeddings
*/
export function averageEmbeddings(embeddings) {
if (embeddings.length === 0)
return [];
const dim = embeddings[0].length;
const result = new Array(dim).fill(0);
for (const embedding of embeddings) {
for (let i = 0; i < dim; i++) {
result[i] += embedding[i];
}
}
const n = embeddings.length;
for (let i = 0; i < dim; i++) {
result[i] /= n;
}
return result;
}
/**
* Normalize an embedding to unit length
*/
export function normalizeEmbedding(embedding) {
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
if (magnitude === 0)
return embedding;
return embedding.map(val => val / magnitude);
}
//# sourceMappingURL=embeddings.js.map