@langchain/community
Version:
Third-party integrations for LangChain.js
584 lines (583 loc) • 25.1 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.AzureAISearchVectorStore = exports.AzureAISearchQueryType = void 0;
const uuid = __importStar(require("uuid"));
const search_documents_1 = require("@azure/search-documents");
const vectorstores_1 = require("@langchain/core/vectorstores");
const documents_1 = require("@langchain/core/documents");
const math_1 = require("@langchain/core/utils/math");
const env_1 = require("@langchain/core/utils/env");
/**
* Azure AI Search query type.
*/
exports.AzureAISearchQueryType = {
/** Vector search. */
Similarity: "similarity",
/** Hybrid full text and vector search. */
SimilarityHybrid: "similarity_hybrid",
/** Hybrid full text and vector search with semantic ranking. */
SemanticHybrid: "semantic_hybrid",
};
const USER_AGENT_PREFIX = "langchainjs-azure-aisearch";
const DEFAULT_FIELD_ID = "id";
const DEFAULT_FIELD_CONTENT = "content";
const DEFAULT_FIELD_CONTENT_VECTOR = "content_vector";
const DEFAULT_FIELD_METADATA = "metadata";
const DEFAULT_FIELD_METADATA_SOURCE = "source";
const DEFAULT_FIELD_METADATA_ATTRS = "attributes";
/**
* Azure AI Search vector store.
* To use this, you should have:
* - the `@azure/search-documents` NPM package installed
* - an endpoint and key to the Azure AI Search instance
*
* If you directly provide a `SearchClient` instance, you need to ensure that
* an index has been created. When using and endpoint and key, the index will
* be created automatically if it does not exist.
*/
class AzureAISearchVectorStore extends vectorstores_1.VectorStore {
get lc_secrets() {
return {
endpoint: "AZURE_AISEARCH_ENDPOINT",
key: "AZURE_AISEARCH_KEY",
};
}
_vectorstoreType() {
return "azure_aisearch";
}
constructor(embeddings, config) {
super(embeddings, config);
Object.defineProperty(this, "initPromise", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indexName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "options", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
const endpoint = config.endpoint ?? (0, env_1.getEnvironmentVariable)("AZURE_AISEARCH_ENDPOINT");
const key = config.key ?? (0, env_1.getEnvironmentVariable)("AZURE_AISEARCH_KEY");
let { credentials } = config;
if (!config.client && (!endpoint || (!key && !credentials))) {
throw new Error("Azure AI Search client or endpoint and key/credentials must be set.");
}
this.indexName = config.indexName ?? "vectorsearch";
if (!config.client) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
credentials ??= new search_documents_1.AzureKeyCredential(key);
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
this.client = new search_documents_1.SearchClient(endpoint, this.indexName, credentials, {
userAgentOptions: { userAgentPrefix: USER_AGENT_PREFIX },
});
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const indexClient = new search_documents_1.SearchIndexClient(endpoint, credentials, {
userAgentOptions: { userAgentPrefix: USER_AGENT_PREFIX },
});
// Start initialization, but don't wait for it to finish here
this.initPromise = this.ensureIndexExists(indexClient).catch((error) => {
console.error("Error during Azure AI Search index initialization:", error);
});
}
else {
this.client = config.client;
}
this.options = config.search ?? {};
this.embeddings = embeddings;
}
/**
* Removes specified documents from the AzureAISearchVectorStore using IDs or a filter.
* @param params Object that includes either an array of IDs or a filter for the data to be deleted.
* @returns A promise that resolves when the documents have been removed.
*/
async delete(params) {
if (!params.ids && !params.filter) {
throw new Error(`Azure AI Search delete requires either "ids" or "filter" to be set in the params object`);
}
await this.initPromise;
if (params.ids) {
await this.deleteById(params.ids);
}
if (params.filter) {
await this.deleteMany(params.filter);
}
}
/**
* Removes specified documents from the AzureAISearchVectorStore using a filter.
* @param filter Filter options to find documents to delete.
* @returns A promise that resolves when the documents have been removed.
*/
async deleteMany(filter) {
if (!filter.filterExpression) {
throw new Error(`Azure AI Search deleteMany requires "filterExpression" to be set in the filter object`);
}
const { results } = await this.client.search("*", {
filter: filter.filterExpression,
});
const docs = [];
for await (const item of results) {
docs.push(item.document);
}
const deleteResults = [];
const bufferedClient = new search_documents_1.SearchIndexingBufferedSender(this.client, (entity) => entity.id);
bufferedClient.on("batchSucceeded", (response) => {
deleteResults.push(...response.results);
});
bufferedClient.on("batchFailed", (response) => {
throw new Error(`Azure AI Search deleteDocuments batch failed: ${response}`);
});
await bufferedClient.deleteDocuments(docs);
await bufferedClient.flush();
await bufferedClient.dispose();
return deleteResults;
}
/**
* Removes specified documents from the AzureAISearchVectorStore.
* @param ids IDs of the documents to be removed.
* @returns A promise that resolves when the documents have been removed.
*/
async deleteById(ids) {
const docsIds = Array.isArray(ids) ? ids : [ids];
const docs = docsIds.map((id) => ({ id }));
const deleteResults = [];
const bufferedClient = new search_documents_1.SearchIndexingBufferedSender(this.client, (entity) => entity.id);
bufferedClient.on("batchSucceeded", (response) => {
deleteResults.push(...response.results);
});
bufferedClient.on("batchFailed", (response) => {
throw new Error(`Azure AI Search deleteDocuments batch failed: ${response}`);
});
await bufferedClient.deleteDocuments(docs);
await bufferedClient.flush();
await bufferedClient.dispose();
return deleteResults;
}
/**
* Adds documents to the AzureAISearchVectorStore.
* @param documents The documents to add.
* @param options Options for adding documents.
* @returns A promise that resolves to the ids of the added documents.
*/
async addDocuments(documents, options) {
const texts = documents.map(({ pageContent }) => pageContent);
const embeddings = await this.embeddings.embedDocuments(texts);
const results = await this.addVectors(embeddings, documents, options);
return results;
}
/**
* Adds vectors to the AzureAISearchVectorStore.
* @param vectors Vectors to be added.
* @param documents Corresponding documents to be added.
* @param options Options for adding documents.
* @returns A promise that resolves to the ids of the added documents.
*/
async addVectors(vectors, documents, options) {
const ids = options?.ids ?? documents.map(() => uuid.v4());
const entities = documents.map((doc, idx) => ({
id: ids[idx],
content: doc.pageContent,
content_vector: vectors[idx],
metadata: {
source: doc.metadata?.source,
attributes: doc.metadata?.attributes ?? [],
},
}));
await this.initPromise;
const bufferedClient = new search_documents_1.SearchIndexingBufferedSender(this.client, (entity) => entity.id);
bufferedClient.on("batchFailed", (response) => {
throw new Error(`Azure AI Search uploadDocuments batch failed: ${response}`);
});
await bufferedClient.uploadDocuments(entities);
await bufferedClient.flush();
await bufferedClient.dispose();
return ids;
}
/**
* Performs a similarity search using query type specified in configuration.
* If the query type is not specified, it defaults to similarity search.
* @param query Query text for the similarity search.
* @param k=4 Number of nearest neighbors to return.
* @param filter Optional filter options for the documents.
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
*/
async similaritySearch(query, k = 4, filter = undefined) {
const results = await this.similaritySearchWithScore(query, k, filter);
return results.map((result) => result[0]);
}
/**
* Performs a similarity search using query type specified in configuration.
* If the query type is not specified, it defaults to similarity hybrid search.
* @param query Query text for the similarity search.
* @param k=4 Number of nearest neighbors to return.
* @param filter Optional filter options for the documents.
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
*/
async similaritySearchWithScore(query, k = 4, filter = undefined) {
const searchType = this.options.type ?? exports.AzureAISearchQueryType.SimilarityHybrid;
if (searchType === exports.AzureAISearchQueryType.Similarity) {
return this.similaritySearchVectorWithScore(await this.embeddings.embedQuery(query), k, filter);
}
else if (searchType === exports.AzureAISearchQueryType.SimilarityHybrid) {
return this.hybridSearchVectorWithScore(query, await this.embeddings.embedQuery(query), k, filter);
}
else if (searchType === exports.AzureAISearchQueryType.SemanticHybrid) {
return this.semanticHybridSearchVectorWithScore(query, await this.embeddings.embedQuery(query), k, filter);
}
throw new Error(`Unrecognized search type '${searchType}'`);
}
/**
* Performs a hybrid search using query text.
* @param query Query text for the similarity search.
* @param queryVector Query vector for the similarity search.
* If not provided, the query text will be embedded.
* @param k=4 Number of nearest neighbors to return.
* @param filter Optional filter options for the documents.
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
*/
async hybridSearchVectorWithScore(query, queryVector, k = 4, filter = undefined) {
const vector = queryVector ?? (await this.embeddings.embedQuery(query));
await this.initPromise;
const { results } = await this.client.search(query, {
vectorSearchOptions: {
queries: [
{
kind: "vector",
vector,
kNearestNeighborsCount: k,
fields: [DEFAULT_FIELD_CONTENT_VECTOR],
},
],
filterMode: filter?.vectorFilterMode,
},
filter: filter?.filterExpression,
top: k,
});
const docsWithScore = [];
for await (const item of results) {
const document = new documents_1.Document({
pageContent: item.document[DEFAULT_FIELD_CONTENT],
metadata: {
...item.document[DEFAULT_FIELD_METADATA],
},
});
if (filter?.includeEmbeddings) {
document.metadata.embedding =
item.document[DEFAULT_FIELD_CONTENT_VECTOR];
}
docsWithScore.push([document, item.score]);
}
return docsWithScore;
}
/**
* Performs a hybrid search with semantic reranker using query text.
* @param query Query text for the similarity search.
* @param queryVector Query vector for the similarity search.
* If not provided, the query text will be embedded.
* @param k=4 Number of nearest neighbors to return.
* @param filter Optional filter options for the documents.
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
*/
async semanticHybridSearchVectorWithScore(query, queryVector, k = 4, filter = undefined) {
const vector = queryVector ?? (await this.embeddings.embedQuery(query));
await this.initPromise;
const { results } = await this.client.search(query, {
vectorSearchOptions: {
queries: [
{
kind: "vector",
vector,
kNearestNeighborsCount: k,
fields: [DEFAULT_FIELD_CONTENT_VECTOR],
},
],
filterMode: filter?.vectorFilterMode,
},
filter: filter?.filterExpression,
top: k,
queryType: "semantic",
semanticSearchOptions: {
configurationName: "semantic-search-config",
},
});
const docsWithScore = [];
for await (const item of results) {
const document = new documents_1.Document({
pageContent: item.document[DEFAULT_FIELD_CONTENT],
metadata: {
...item.document[DEFAULT_FIELD_METADATA],
},
});
if (filter?.includeEmbeddings) {
document.metadata.embedding =
item.document[DEFAULT_FIELD_CONTENT_VECTOR];
}
docsWithScore.push([document, item.score]);
}
return docsWithScore;
}
/**
* Performs a similarity search on the vectors stored in the collection.
* @param queryVector Query vector for the similarity search.
* @param k=4 Number of nearest neighbors to return.
* @param filter Optional filter options for the documents.
* @returns Promise that resolves to a list of documents and their corresponding similarity scores.
*/
async similaritySearchVectorWithScore(query, k, filter) {
await this.initPromise;
const { results } = await this.client.search("*", {
vectorSearchOptions: {
queries: [
{
kind: "vector",
vector: query,
kNearestNeighborsCount: k,
fields: [DEFAULT_FIELD_CONTENT_VECTOR],
},
],
filterMode: filter?.vectorFilterMode,
},
filter: filter?.filterExpression,
});
const docsWithScore = [];
for await (const item of results) {
const document = new documents_1.Document({
pageContent: item.document[DEFAULT_FIELD_CONTENT],
metadata: {
...item.document[DEFAULT_FIELD_METADATA],
},
});
if (filter?.includeEmbeddings) {
document.metadata.embedding =
item.document[DEFAULT_FIELD_CONTENT_VECTOR];
}
docsWithScore.push([document, item.score]);
}
return docsWithScore;
}
/**
* Return documents selected using the maximal marginal relevance.
* Maximal marginal relevance optimizes for similarity to the query AND
* diversity among selected documents.
* @param query Text to look up documents similar to.
* @param options.k Number of documents to return.
* @param options.fetchK=20 Number of documents to fetch before passing to
* the MMR algorithm.
* @param options.lambda=0.5 Number between 0 and 1 that determines the
* degree of diversity among the results, where 0 corresponds to maximum
* diversity and 1 to minimum diversity.
* @returns List of documents selected by maximal marginal relevance.
*/
async maxMarginalRelevanceSearch(query, options) {
const { k, fetchK = 20, lambda = 0.5 } = options;
const includeEmbeddingsFlag = options.filter?.includeEmbeddings || false;
const queryEmbedding = await this.embeddings.embedQuery(query);
const docs = await this.similaritySearchVectorWithScore(queryEmbedding, fetchK, {
...options.filter,
includeEmbeddings: true,
});
const embeddingList = docs.map((doc) => doc[0].metadata.embedding);
// Re-rank the results using MMR
const mmrIndexes = (0, math_1.maximalMarginalRelevance)(queryEmbedding, embeddingList, lambda, k);
return mmrIndexes.map((index) => {
const doc = docs[index][0];
// Remove embeddings if they were not requested originally
if (!includeEmbeddingsFlag) {
delete doc.metadata.embedding;
}
return doc;
});
}
/**
* Ensures that an index exists on the AzureAISearchVectorStore.
* @param indexClient The Azure AI Search index client.
* @returns A promise that resolves when the AzureAISearchVectorStore index has been initialized.
* @protected
*/
async ensureIndexExists(indexClient) {
try {
await indexClient.getIndex(this.indexName);
}
catch (e) {
// Index does not exists, create it
const searchIndex = await this.createSearchIndexDefinition(this.indexName);
await indexClient.createIndex(searchIndex);
}
}
/**
* Prepares the search index definition for Azure AI Search.
* @param indexName The name of the index.
* @returns The SearchIndex object.
* @protected
*/
async createSearchIndexDefinition(indexName) {
// Embed a test query to get the embedding dimensions
const testEmbedding = await this.embeddings.embedQuery("test");
const embeddingDimensions = testEmbedding.length;
return {
name: indexName,
vectorSearch: {
algorithms: [
{
name: "vector-search-algorithm",
kind: "hnsw",
parameters: {
m: 4,
efSearch: 500,
metric: "cosine",
efConstruction: 400,
},
},
],
profiles: [
{
name: "vector-search-profile",
algorithmConfigurationName: "vector-search-algorithm",
},
],
},
semanticSearch: {
defaultConfigurationName: "semantic-search-config",
configurations: [
{
name: "semantic-search-config",
prioritizedFields: {
contentFields: [
{
name: DEFAULT_FIELD_CONTENT,
},
],
keywordsFields: [
{
name: DEFAULT_FIELD_CONTENT,
},
],
},
},
],
},
fields: [
{
name: DEFAULT_FIELD_ID,
filterable: true,
key: true,
type: "Edm.String",
},
{
name: DEFAULT_FIELD_CONTENT,
searchable: true,
filterable: true,
type: "Edm.String",
},
{
name: DEFAULT_FIELD_CONTENT_VECTOR,
searchable: true,
type: "Collection(Edm.Single)",
vectorSearchDimensions: embeddingDimensions,
vectorSearchProfileName: "vector-search-profile",
},
{
name: DEFAULT_FIELD_METADATA,
type: "Edm.ComplexType",
fields: [
{
name: DEFAULT_FIELD_METADATA_SOURCE,
type: "Edm.String",
filterable: true,
},
{
name: DEFAULT_FIELD_METADATA_ATTRS,
type: "Collection(Edm.ComplexType)",
fields: [
{
name: "key",
type: "Edm.String",
filterable: true,
},
{
name: "value",
type: "Edm.String",
filterable: true,
},
],
},
],
},
],
};
}
/**
* Static method to create an instance of AzureAISearchVectorStore from a
* list of texts. It first converts the texts to vectors and then adds
* them to the collection.
* @param texts List of texts to be converted to vectors.
* @param metadatas Metadata for the texts.
* @param embeddings Embeddings to be used for conversion.
* @param config Database configuration for Azure AI Search.
* @returns Promise that resolves to a new instance of AzureAISearchVectorStore.
*/
static async fromTexts(texts, metadatas, embeddings, config) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new documents_1.Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return AzureAISearchVectorStore.fromDocuments(docs, embeddings, config);
}
/**
* Static method to create an instance of AzureAISearchVectorStore from a
* list of documents. It first converts the documents to vectors and then
* adds them to the database.
* @param docs List of documents to be converted to vectors.
* @param embeddings Embeddings to be used for conversion.
* @param config Database configuration for Azure AI Search.
* @returns Promise that resolves to a new instance of AzureAISearchVectorStore.
*/
static async fromDocuments(docs, embeddings, config, options) {
const instance = new this(embeddings, config);
await instance.addDocuments(docs, options);
return instance;
}
}
exports.AzureAISearchVectorStore = AzureAISearchVectorStore;