seekmix
Version:
🔍 A local semantic caching library for Node.js.
278 lines (242 loc) • 13.2 kB
JavaScript
const { describe, it, before, after } = require('mocha');
const { strict: assert } = require('assert');
const { SeekMix } = require('../index');
describe('SeekMix Multilanguage Tests', function () {
// Increase timeout for tests since language processing can take time
this.timeout(60000);
// Initialize the cache with Huggingface provider which supports multilingual embeddings
let cache;
before(async function () {
// Create a new SeekMix instance with Huggingface provider
cache = new SeekMix({
ttl: 3600, // 1 hour TTL
dropIndex: true, // Fresh index for testing
dropKeys: true // Clear any existing keys
});
// Connect to the cache
await cache.connect();
console.log('Connected to SeekMix cache for testing');
});
after(async function () {
// Disconnect from the cache when tests are done
await cache.disconnect();
console.log('Disconnected from SeekMix cache');
});
// Test multilingual query and retrieval
describe('Multilingual semantic caching', function () {
// Define 20 language pairs with semantically similar queries
const languagePairs = [
{
language: 'English',
original: 'What are the best restaurants in Madrid',
similar: 'Recommend me places to eat in Madrid',
different: 'How is the weather in Barcelona'
},
{
language: 'Spanish',
original: 'Cuáles son los mejores restaurantes de Madrid',
similar: 'Recomiéndame lugares para comer en Madrid',
different: 'Cómo está el clima en Barcelona'
},
{
language: 'French',
original: 'Quels sont les meilleurs restaurants de Madrid',
similar: 'Recommandez-moi des endroits pour manger à Madrid',
different: 'Comment est la météo à Barcelone'
},
{
language: 'German',
original: 'Was sind die besten Restaurants in Madrid',
similar: 'Empfehlen Sie mir Orte zum Essen in Madrid',
different: 'Wie ist das Wetter in Barcelona'
},
{
language: 'Italian',
original: 'Quali sono i migliori ristoranti di Madrid',
similar: 'Consigliami dove mangiare a Madrid',
different: 'Com\'è il tempo a Barcellona'
},
{
language: 'Portuguese',
original: 'Quais são os melhores restaurantes em Madrid',
similar: 'Recomende-me lugares para comer em Madrid',
different: 'Como está o clima em Barcelona'
},
{
language: 'Dutch',
original: 'Wat zijn de beste restaurants in Madrid',
similar: 'Raad me plekken aan om te eten in Madrid',
different: 'Hoe is het weer in Barcelona'
},
{
language: 'Polish',
original: 'Jakie są najlepsze restauracje w Madrycie',
similar: 'Poleć mi miejsca do jedzenia w Madrycie',
different: 'Jaka jest pogoda w Barcelonie'
},
{
language: 'Swedish',
original: 'Vilka är de bästa restaurangerna i Madrid',
similar: 'Rekommendera ställen att äta på i Madrid',
different: 'Hur är vädret i Barcelona'
},
{
language: 'Finnish',
original: 'Mitkä ovat parhaat ravintolat Madridissa',
similar: 'Suosittele paikkoja syödä Madridissa',
different: 'Millainen sää on Barcelonassa'
},
{
language: 'Danish',
original: 'Hvad er de bedste restauranter i Madrid',
similar: 'Anbefal mig steder at spise i Madrid',
different: 'Hvordan er vejret i Barcelona'
},
{
language: 'Norwegian',
original: 'Hva er de beste restaurantene i Madrid',
similar: 'Anbefal meg steder å spise i Madrid',
different: 'Hvordan er været i Barcelona'
},
{
language: 'Czech',
original: 'Jaké jsou nejlepší restaurace v Madridu',
similar: 'Doporučte mi místa k jídlu v Madridu',
different: 'Jaké je počasí v Barceloně'
},
{
language: 'Hungarian',
original: 'Melyek a legjobb éttermek Madridban',
similar: 'Ajánlj nekem helyeket ahol ehetek Madridban',
different: 'Milyen az időjárás Barcelonában'
},
{
language: 'Greek',
original: 'Ποια είναι τα καλύτερα εστιατόρια στη Μαδρίτη',
similar: 'Προτείνετέ μου μέρη για φαγητό στη Μαδρίτη',
different: 'Πώς είναι ο καιρός στη Βαρκελώνη'
},
{
language: 'Turkish',
original: 'Madrid\'deki en iyi restoranlar hangileridir',
similar: 'Madrid\'de yemek yiyebileceğim yerler öner',
different: 'Barselona\'da hava nasıl'
},
{
language: 'Russian',
original: 'Какие лучшие рестораны в Мадриде',
similar: 'Порекомендуйте места для еды в Мадриде',
different: 'Какая погода в Барселоне'
},
{
language: 'Japanese',
original: 'マドリードの最高のレストランはどこですか',
similar: 'マドリードで食事ができる場所を教えてください',
different: 'バルセロナの天気はどうですか'
},
{
language: 'Chinese',
original: '马德里最好的餐厅是哪些',
similar: '推荐在马德里吃饭的地方',
different: '巴塞罗那的天气怎么样'
},
{
language: 'Arabic',
original: 'ما هي أفضل المطاعم في مدريد',
similar: 'أوصيني بأماكن للأكل في مدريد',
different: 'كيف الطقس في برشلونة'
}
];
it('should cache and retrieve semantically similar queries across languages', async function () {
// For each language pair in our test set
for (const pair of languagePairs) {
const { language, original, similar, different } = pair;
console.log(`\nTesting ${language} language:`);
console.log(`Original query: "${original}"`);
// Simulate a result for the original query
const result = `Results for: ${original} (${language})`;
// Cache the original query
await cache.set(original, result);
console.log(`Cached result for original query`);
// Test with a semantically similar query
console.log(`Similar query: "${similar}"`);
const similarResult = await cache.get(similar);
// Assert that we get a cache hit for the similar query
assert.notEqual(similarResult, null,
`Should find a similar entry for "${similar}" in ${language}`);
console.log(`✅ Cache hit - Similarity: ${(1 - similarResult.score).toFixed(4)}`);
console.log(`Retrieved original query: "${similarResult.query}"`);
// Test with a semantically different query
console.log(`Different query: "${different}"`);
const differentResult = await cache.get(different);
// Assert that we get a cache miss for the different query
assert.equal(differentResult, null,
`Should NOT find a similar entry for "${different}" in ${language}`);
console.log(`✅ Cache miss for different query`);
}
});
it('should cache and retrieve cross-language semantically similar queries', async function () {
// Test if queries in different languages but with same meaning can be matched
// We'll use English as source and test against a few other languages
const englishQuery = 'What are the best restaurants in New York';
const englishResult = 'Results for best New York restaurants';
// Store the English query
await cache.set(englishQuery, englishResult);
console.log(`\nCached original English query: "${englishQuery}"`);
// Test similar queries in different languages
const crossLanguageQueries = [
{ language: 'Spanish', query: 'Cuáles son los mejores restaurantes en Nueva York' },
{ language: 'French', query: 'Quels sont les meilleurs restaurants à New York' },
{ language: 'German', query: 'Was sind die besten Restaurants in New York' },
{ language: 'Italian', query: 'Quali sono i migliori ristoranti di New York' }
];
for (const { language, query } of crossLanguageQueries) {
console.log(`Testing ${language} query: "${query}"`);
// Try to retrieve using the translated query
const result = await cache.get(query);
console.log(result ?
`✅ Cross-language cache hit - Similarity: ${(1 - result.score).toFixed(4)}` :
`❌ Cross-language cache miss`);
// We're not asserting here because cross-language performance depends on the embedding model quality
}
});
it('should properly identify cache misses between unrelated queries across languages', async function () {
// This test verifies that unrelated queries in different languages don't produce false cache hits
console.log('\nTesting cache misses between unrelated queries in different languages:');
// Create a set of unrelated queries in different languages
const unrelatedQueries = [
{ language: 'English', query: 'How to grow tomatoes in the garden', result: 'Tomato growing guide' },
{ language: 'Spanish', query: 'Cómo reservar un vuelo a París', result: 'Reservas de vuelos a París' },
{ language: 'French', query: 'Recette pour faire du pain', result: 'Recette de pain traditionnel' },
{ language: 'German', query: 'Die besten Sehenswürdigkeiten in Berlin', result: 'Berliner Touristenattraktion' },
{ language: 'Italian', query: 'Come preparare la pasta carbonara', result: 'Ricetta pasta carbonara' }
];
// Cache all the queries
for (const { language, query, result } of unrelatedQueries) {
await cache.set(query, result);
console.log(`Cached ${language} query: "${query}"`);
}
// Test that each query doesn't retrieve any of the others
for (let i = 0; i < unrelatedQueries.length; i++) {
const { language, query, result } = unrelatedQueries[i];
// Test against all other queries
for (let j = 0; j < unrelatedQueries.length; j++) {
if (i === j) continue; // Skip testing against itself
const otherQuery = unrelatedQueries[j].query;
const otherLanguage = unrelatedQueries[j].language;
console.log(`Testing if ${language} query "${query}" matches ${otherLanguage} query "${otherQuery}"`);
// Get the exact match first to ensure it's cached
const exactMatch = await cache.get(query);
assert.notEqual(exactMatch, null, `Should find exact match for "${query}"`);
assert.equal(exactMatch.result, result, `Should return correct result for exact match`);
// Check if the other query returns a match (it shouldn't)
const crossMatch = await cache.get(otherQuery);
// It should find its own result, but not the result of the current query
assert.notEqual(crossMatch, null, `Should find a result for "${otherQuery}"`);
assert.notEqual(crossMatch.result, result, `"${otherQuery}" should not match "${query}"`);
console.log(`✅ Verified no false match between different queries`);
}
}
});
});
});