UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

285 lines (229 loc) 7.4 kB
/** * Tests for embedding utilities * * Pure unit tests for utility functions that don't require AI API calls. * Gateway-dependent tests are skipped if no gateway is configured. */ import { describe, it, expect } from 'vitest' import { cosineSimilarity, findSimilar, pairwiseSimilarity, clusterBySimilarity, averageEmbeddings, normalizeEmbedding, } from '../src/index.js' describe('cosineSimilarity', () => { it('returns 1 for identical vectors', () => { const a = [1, 0, 0] const b = [1, 0, 0] expect(cosineSimilarity(a, b)).toBeCloseTo(1) }) it('returns 0 for orthogonal vectors', () => { const a = [1, 0, 0] const b = [0, 1, 0] expect(cosineSimilarity(a, b)).toBeCloseTo(0) }) it('returns -1 for opposite vectors', () => { const a = [1, 0, 0] const b = [-1, 0, 0] expect(cosineSimilarity(a, b)).toBeCloseTo(-1) }) it('handles non-normalized vectors', () => { const a = [2, 0, 0] const b = [5, 0, 0] expect(cosineSimilarity(a, b)).toBeCloseTo(1) }) it('works with higher dimensional vectors', () => { const a = [1, 2, 3, 4, 5] const b = [1, 2, 3, 4, 5] expect(cosineSimilarity(a, b)).toBeCloseTo(1) }) }) describe('findSimilar', () => { const embeddings = [ [1, 0, 0], [0.9, 0.1, 0], [0, 1, 0], [0, 0, 1], [-1, 0, 0], ] const items = ['A', 'B', 'C', 'D', 'E'] it('finds most similar items', () => { const query = [1, 0, 0] const results = findSimilar(query, embeddings, items, { topK: 3 }) expect(results).toHaveLength(3) expect(results[0].item).toBe('A') // Exact match expect(results[0].score).toBeCloseTo(1) expect(results[1].item).toBe('B') // Very similar }) it('respects topK parameter', () => { const query = [1, 0, 0] const results = findSimilar(query, embeddings, items, { topK: 2 }) expect(results).toHaveLength(2) }) it('filters by minScore', () => { const query = [1, 0, 0] const results = findSimilar(query, embeddings, items, { minScore: 0.5 }) // Only A and B should have score >= 0.5 expect(results.every(r => r.score >= 0.5)).toBe(true) expect(results).toHaveLength(2) }) it('returns index in results', () => { const query = [0, 1, 0] const results = findSimilar(query, embeddings, items, { topK: 1 }) expect(results[0].item).toBe('C') expect(results[0].index).toBe(2) }) it('handles empty embeddings', () => { const query = [1, 0, 0] const results = findSimilar(query, [], [], { topK: 5 }) expect(results).toHaveLength(0) }) }) describe('pairwiseSimilarity', () => { it('creates a symmetric matrix', () => { const embeddings = [ [1, 0, 0], [0, 1, 0], [0, 0, 1], ] const matrix = pairwiseSimilarity(embeddings) expect(matrix).toHaveLength(3) expect(matrix[0]).toHaveLength(3) // Check symmetry for (let i = 0; i < 3; i++) { for (let j = 0; j < 3; j++) { expect(matrix[i][j]).toBeCloseTo(matrix[j][i]) } } }) it('has 1s on the diagonal', () => { const embeddings = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], ] const matrix = pairwiseSimilarity(embeddings) for (let i = 0; i < 3; i++) { expect(matrix[i][i]).toBeCloseTo(1) } }) it('correctly computes similarity for orthogonal vectors', () => { const embeddings = [ [1, 0, 0], [0, 1, 0], ] const matrix = pairwiseSimilarity(embeddings) expect(matrix[0][1]).toBeCloseTo(0) expect(matrix[1][0]).toBeCloseTo(0) }) }) describe('clusterBySimilarity', () => { it('groups similar items together', () => { const embeddings = [ [1, 0, 0], [0.95, 0.05, 0], [0, 1, 0], [0.05, 0.95, 0], ] const items = ['A', 'A-like', 'B', 'B-like'] const clusters = clusterBySimilarity(embeddings, items, { threshold: 0.9 }) // Should create 2 clusters expect(clusters).toHaveLength(2) // Find the cluster with A const clusterA = clusters.find(c => c.includes('A')) expect(clusterA).toContain('A-like') // Find the cluster with B const clusterB = clusters.find(c => c.includes('B')) expect(clusterB).toContain('B-like') }) it('creates single-item clusters for dissimilar items', () => { const embeddings = [ [1, 0, 0], [0, 1, 0], [0, 0, 1], ] const items = ['X', 'Y', 'Z'] const clusters = clusterBySimilarity(embeddings, items, { threshold: 0.9 }) // Each item should be in its own cluster expect(clusters).toHaveLength(3) expect(clusters.every(c => c.length === 1)).toBe(true) }) it('puts all items in one cluster at low threshold', () => { const embeddings = [ [1, 0.1, 0.1], [0.9, 0.2, 0.1], [0.8, 0.3, 0.1], ] const items = ['A', 'B', 'C'] const clusters = clusterBySimilarity(embeddings, items, { threshold: 0.5 }) // All items similar enough to be in one cluster expect(clusters).toHaveLength(1) expect(clusters[0]).toHaveLength(3) }) }) describe('averageEmbeddings', () => { it('averages multiple embeddings', () => { const embeddings = [ [1, 0, 0], [0, 1, 0], [0, 0, 1], ] const avg = averageEmbeddings(embeddings) expect(avg).toHaveLength(3) expect(avg[0]).toBeCloseTo(1 / 3) expect(avg[1]).toBeCloseTo(1 / 3) expect(avg[2]).toBeCloseTo(1 / 3) }) it('returns empty array for empty input', () => { const avg = averageEmbeddings([]) expect(avg).toEqual([]) }) it('returns same vector for single input', () => { const embedding = [1, 2, 3] const avg = averageEmbeddings([embedding]) expect(avg).toEqual([1, 2, 3]) }) }) describe('normalizeEmbedding', () => { it('normalizes a vector to unit length', () => { const embedding = [3, 4, 0] // length = 5 const normalized = normalizeEmbedding(embedding) expect(normalized[0]).toBeCloseTo(0.6) expect(normalized[1]).toBeCloseTo(0.8) expect(normalized[2]).toBeCloseTo(0) // Check magnitude is 1 const magnitude = Math.sqrt( normalized.reduce((sum, val) => sum + val * val, 0) ) expect(magnitude).toBeCloseTo(1) }) it('handles already normalized vectors', () => { const embedding = [1, 0, 0] const normalized = normalizeEmbedding(embedding) expect(normalized).toEqual([1, 0, 0]) }) it('handles zero vector', () => { const embedding = [0, 0, 0] const normalized = normalizeEmbedding(embedding) expect(normalized).toEqual([0, 0, 0]) }) }) // Skip API-dependent tests if no gateway const hasGateway = !!process.env.AI_GATEWAY_URL || !!process.env.CF_ACCOUNT_ID describe.skipIf(!hasGateway)('embedText and embedTexts', () => { // These tests would require actual API calls // They're here as placeholders for when the gateway is available it.skip('embeds a single text', async () => { // const { embedText } = await import('../src/index.js') // const { embedding } = await embedText('hello world') // expect(Array.isArray(embedding)).toBe(true) // expect(embedding.length).toBeGreaterThan(0) }) it.skip('embeds multiple texts', async () => { // const { embedTexts } = await import('../src/index.js') // const { embeddings } = await embedTexts(['doc1', 'doc2', 'doc3']) // expect(embeddings).toHaveLength(3) }) })