UNPKG

@langchain/community

Version:
369 lines (368 loc) 14.2 kB
/* eslint-disable no-process-env */ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { jest, test, expect } from "@jest/globals"; import { setTimeout } from "timers/promises"; import { SearchIndexClient, AzureKeyCredential } from "@azure/search-documents"; import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; import { FakeEmbeddings } from "@langchain/core/utils/testing"; import { AzureAISearchVectorStore, AzureAISearchQueryType, } from "../azure_aisearch.js"; const INDEX_NAME = "vectorsearch"; const DOCUMENT_IDS = ["1", "2", "3", "4"]; /* * To run these tests, you need have an Azure AI Search instance running. * You can deploy a free version on Azure Portal without any cost, following * this guide: * https://learn.microsoft.com/azure/search/search-create-service-portal * * Once you have the instance running, you need to set the following environment * variables before running the test: * - AZURE_AISEARCH_ENDPOINT * - AZURE_AISEARCH_KEY * - AZURE_OPENAI_API_KEY * - AZURE_OPENAI_API_INSTANCE_NAME * - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME * - AZURE_OPENAI_API_VERSION * * A regular OpenAI key can also be used instead of Azure OpenAI. */ describe.skip("AzureAISearchVectorStore e2e integration tests", () => { let indexClient; beforeEach(async () => { expect(process.env.AZURE_AISEARCH_ENDPOINT).toBeDefined(); expect(process.env.AZURE_AISEARCH_KEY).toBeDefined(); // Note: when using Azure OpenAI, you have to also set these variables // in addition to the API key: // - AZURE_OPENAI_API_INSTANCE_NAME // - AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME // - AZURE_OPENAI_API_VERSION expect(process.env.OPENAI_API_KEY || process.env.AZURE_OPENAI_API_KEY).toBeDefined(); // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const endpoint = process.env.AZURE_AISEARCH_ENDPOINT; // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const credential = new AzureKeyCredential(process.env.AZURE_AISEARCH_KEY); indexClient = new SearchIndexClient(endpoint, credential); try { await indexClient.deleteIndex(INDEX_NAME); } catch (e) { // Ignore if documents or index do not exist } }); afterAll(async () => { try { await indexClient.deleteIndex(INDEX_NAME); } catch (e) { // Ignore } }); test("performs similarity search", async () => { const vectorStore = new AzureAISearchVectorStore(new OpenAIEmbeddings(), { indexName: INDEX_NAME, search: { type: AzureAISearchQueryType.SemanticHybrid, }, }); expect(vectorStore).toBeDefined(); await vectorStore.addDocuments([ { pageContent: "This book is about politics", metadata: { source: "doc1", attributes: [{ key: "a", value: "1" }], }, }, { pageContent: "Cats sleeps a lot.", metadata: { source: "doc2", attributes: [{ key: "b", value: "1" }], }, }, { pageContent: "Sandwiches taste good.", metadata: { source: "doc3", attributes: [{ key: "c", value: "1" }], }, }, { pageContent: "The house is open", metadata: { source: "doc4", attributes: [ { key: "d", value: "1" }, { key: "e", value: "2" }, ], }, }, ], { ids: DOCUMENT_IDS }); // Wait for the documents to be indexed await setTimeout(1000); const results = await vectorStore.similaritySearch("sandwich", 1); expect(results.length).toEqual(1); expect(results).toMatchObject([ { pageContent: "Sandwiches taste good.", metadata: { source: "doc3", attributes: [{ key: "c", value: "1" }], }, }, ]); const retriever = vectorStore.asRetriever({}); const docs = await retriever.getRelevantDocuments("house"); expect(docs).toBeDefined(); expect(docs[0]).toMatchObject({ pageContent: "The house is open", metadata: { source: "doc4", attributes: [ { key: "d", value: "1" }, { key: "e", value: "2" }, ], }, }); }); test("performs max marginal relevance search", async () => { const texts = ["foo", "foo", "fox"]; const vectorStore = await AzureAISearchVectorStore.fromTexts(texts, {}, new OpenAIEmbeddings(), { indexName: INDEX_NAME, search: { type: "similarity", }, }); // Wait for the documents to be indexed await setTimeout(1000); const output = await vectorStore.maxMarginalRelevanceSearch("foo", { k: 10, fetchK: 20, lambda: 0.1, }); expect(output).toHaveLength(texts.length); const actual = output.map((doc) => doc.pageContent); const expected = ["foo", "fox", "foo"]; expect(actual).toEqual(expected); const standardRetriever = await vectorStore.asRetriever(); const standardRetrieverOutput = await standardRetriever.getRelevantDocuments("foo"); expect(output).toHaveLength(texts.length); const standardRetrieverActual = standardRetrieverOutput.map((doc) => doc.pageContent); const standardRetrieverExpected = ["foo", "foo", "fox"]; expect(standardRetrieverActual).toEqual(standardRetrieverExpected); const retriever = await vectorStore.asRetriever({ searchType: "mmr", searchKwargs: { fetchK: 20, lambda: 0.1, }, }); const retrieverOutput = await retriever.getRelevantDocuments("foo"); expect(output).toHaveLength(texts.length); const retrieverActual = retrieverOutput.map((doc) => doc.pageContent); const retrieverExpected = ["foo", "fox", "foo"]; expect(retrieverActual).toEqual(retrieverExpected); const similarity = await vectorStore.similaritySearchWithScore("foo", 1); expect(similarity.length).toBe(1); }); }); describe.skip("AzureAISearchVectorStore integration tests", () => { const embeddings = new FakeEmbeddings(); let indexClient; const embedMock = jest .spyOn(FakeEmbeddings.prototype, "embedDocuments") .mockImplementation(async (documents) => documents.map(() => Array(1536).fill(0.2))); const queryMock = jest .spyOn(FakeEmbeddings.prototype, "embedQuery") .mockImplementation(async () => Array(1536).fill(0.2)); beforeEach(() => { embedMock.mockClear(); queryMock.mockClear(); }); beforeAll(async () => { expect(process.env.AZURE_AISEARCH_ENDPOINT).toBeDefined(); expect(process.env.AZURE_AISEARCH_KEY).toBeDefined(); indexClient = new SearchIndexClient(process.env.AZURE_AISEARCH_ENDPOINT, new AzureKeyCredential(process.env.AZURE_AISEARCH_KEY)); try { await indexClient.deleteIndex(INDEX_NAME); } catch (e) { // Ignore } }); afterAll(async () => { try { await indexClient.deleteIndex(INDEX_NAME); } catch (e) { // Ignore } }); test("test index creation if not exists", async () => { const newName = "index-undefined"; try { await indexClient.deleteIndex(newName); } catch (e) { // Ignore } const store = new AzureAISearchVectorStore(embeddings, { indexName: newName, search: { type: AzureAISearchQueryType.Similarity, }, }); await store.addDocuments([ { pageContent: "foo", metadata: { source: "bar", }, }, ]); const index = await indexClient.getIndex(newName); expect(index).toBeDefined(); // Cleanup try { await indexClient.deleteIndex(newName); } catch (e) { // Ignore } }); test("test add document", async () => { const id = new Date().getTime().toString(); const store = new AzureAISearchVectorStore(embeddings, { indexName: INDEX_NAME, search: { type: AzureAISearchQueryType.Similarity, }, }); const result = await store.addDocuments([ new Document({ pageContent: "test index document upload text", metadata: { source: "test", }, }), ], { ids: [id], }); expect(result).toHaveLength(1); }); test("test search document", async () => { const store = await AzureAISearchVectorStore.fromTexts(["test index document upload text"], [], embeddings, { indexName: INDEX_NAME, }); // Need to wait a bit for the document to be indexed await setTimeout(1000); const docs = await store.similaritySearch("test", 1); expect(docs).toHaveLength(1); expect(docs[0].metadata.embeddings).not.toBeDefined(); }); test("test search document with included embeddings", async () => { const store = await AzureAISearchVectorStore.fromTexts(["test index document upload text"], [], embeddings, { indexName: INDEX_NAME, }); // Need to wait a bit for the document to be indexed await setTimeout(1000); const docs = await store.similaritySearch("test", 1, { includeEmbeddings: true, }); expect(docs).toHaveLength(1); expect(docs[0].metadata.embedding).toBeDefined(); }); test("test search document with filter", async () => { const store = await AzureAISearchVectorStore.fromTexts(["test index document upload text"], [ { source: "filter-test", attributes: [{ key: "abc", value: "def" }], }, ], embeddings, { indexName: INDEX_NAME, }); // Need to wait a bit for the document to be indexed await setTimeout(1000); const bySource = await store.similaritySearch("test", 1, { filterExpression: "metadata/source eq 'filter-test'", }); const byAttr = await store.similaritySearch("test", 1, { filterExpression: "metadata/attributes/any(t: t/key eq 'abc' and t/value eq 'def')", }); expect(bySource).toHaveLength(1); expect(byAttr).toHaveLength(1); }); test("test search document with query key", async () => { const store = new AzureAISearchVectorStore(embeddings, { indexName: INDEX_NAME, }); const result = await store.similaritySearch("test", 1); // Need to wait a bit for the document to be indexed await setTimeout(1000); expect(result).toBeDefined(); }); test("test delete documents by id", async () => { const id = new Date().getTime().toString(); const store = new AzureAISearchVectorStore(embeddings, { indexName: INDEX_NAME, }); await store.addDocuments([ new Document({ pageContent: "test index document upload text", metadata: { source: "deleteById", }, }), ], { ids: [id], }); // Need to wait a bit for the document to be indexed await setTimeout(1000); await store.delete({ ids: id }); // Wait a bit for the index to be updated await setTimeout(1000); const docs = await store.similaritySearch("test", 1, { filterExpression: "metadata/source eq 'deleteById'", }); expect(docs).toHaveLength(0); }); test("test delete documents by filter", async () => { const id = new Date().getTime().toString(); const source = `test-${id}`; const store = new AzureAISearchVectorStore(embeddings, { indexName: INDEX_NAME, }); await store.addDocuments([ new Document({ pageContent: "test index document upload text", metadata: { source, }, }), ]); // Need to wait a bit for the document to be indexed await setTimeout(1000); await store.delete({ filter: { filterExpression: `metadata/source eq '${source}'`, }, }); // Wait a bit for the index to be updated await setTimeout(1000); const docs = await store.similaritySearch("test", 1, { filterExpression: `metadata/source eq '${source}'`, }); expect(docs).toHaveLength(0); }); test("test connect with custom credentials", async () => { const store = await AzureAISearchVectorStore.fromTexts(["test index document upload text"], [], embeddings, { indexName: INDEX_NAME, credentials: new AzureKeyCredential(process.env.AZURE_AISEARCH_KEY), }); // Need to wait a bit for the document to be indexed await setTimeout(1000); const docs = await store.similaritySearch("test", 1); expect(docs).toHaveLength(1); expect(docs[0].metadata.embeddings).not.toBeDefined(); }); });