UNPKG

@langchain/openai

Version:
196 lines (195 loc) 7.24 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.OpenAIEmbeddings = void 0; const openai_1 = require("openai"); const env_1 = require("@langchain/core/utils/env"); const embeddings_1 = require("@langchain/core/embeddings"); const chunk_array_1 = require("@langchain/core/utils/chunk_array"); const azure_js_1 = require("./utils/azure.cjs"); const openai_js_1 = require("./utils/openai.cjs"); /** * Class for generating embeddings using the OpenAI API. * * To use with Azure, import the `AzureOpenAIEmbeddings` class. * * @example * ```typescript * // Embed a query using OpenAIEmbeddings to generate embeddings for a given text * const model = new OpenAIEmbeddings(); * const res = await model.embedQuery( * "What would be a good company name for a company that makes colorful socks?", * ); * console.log({ res }); * * ``` */ class OpenAIEmbeddings extends embeddings_1.Embeddings { constructor(fields) { const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; super(fieldsWithDefaults); Object.defineProperty(this, "model", { enumerable: true, configurable: true, writable: true, value: "text-embedding-ada-002" }); /** @deprecated Use "model" instead */ Object.defineProperty(this, "modelName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "batchSize", { enumerable: true, configurable: true, writable: true, value: 512 }); // TODO: Update to `false` on next minor release (see: https://github.com/langchain-ai/langchainjs/pull/3612) Object.defineProperty(this, "stripNewLines", { enumerable: true, configurable: true, writable: true, value: true }); /** * The number of dimensions the resulting output embeddings should have. * Only supported in `text-embedding-3` and later models. */ Object.defineProperty(this, "dimensions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "timeout", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "organization", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "client", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "clientConfig", { enumerable: true, configurable: true, writable: true, value: void 0 }); const apiKey = fieldsWithDefaults?.apiKey ?? fieldsWithDefaults?.openAIApiKey ?? (0, env_1.getEnvironmentVariable)("OPENAI_API_KEY"); this.organization = fieldsWithDefaults?.configuration?.organization ?? (0, env_1.getEnvironmentVariable)("OPENAI_ORGANIZATION"); this.model = fieldsWithDefaults?.model ?? fieldsWithDefaults?.modelName ?? this.model; this.modelName = this.model; this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; this.stripNewLines = fieldsWithDefaults?.stripNewLines ?? this.stripNewLines; this.timeout = fieldsWithDefaults?.timeout; this.dimensions = fieldsWithDefaults?.dimensions; this.clientConfig = { apiKey, organization: this.organization, dangerouslyAllowBrowser: true, ...fields?.configuration, }; } /** * Method to generate embeddings for an array of documents. Splits the * documents into batches and makes requests to the OpenAI API to generate * embeddings. * @param texts Array of documents to generate embeddings for. * @returns Promise that resolves to a 2D array of embeddings for each document. */ async embedDocuments(texts) { const batches = (0, chunk_array_1.chunkArray)(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize); const batchRequests = batches.map((batch) => { const params = { model: this.model, input: batch, }; if (this.dimensions) { params.dimensions = this.dimensions; } return this.embeddingWithRetry(params); }); const batchResponses = await Promise.all(batchRequests); const embeddings = []; for (let i = 0; i < batchResponses.length; i += 1) { const batch = batches[i]; const { data: batchResponse } = batchResponses[i]; for (let j = 0; j < batch.length; j += 1) { embeddings.push(batchResponse[j].embedding); } } return embeddings; } /** * Method to generate an embedding for a single document. Calls the * embeddingWithRetry method with the document as the input. * @param text Document to generate an embedding for. * @returns Promise that resolves to an embedding for the document. */ async embedQuery(text) { const params = { model: this.model, input: this.stripNewLines ? text.replace(/\n/g, " ") : text, }; if (this.dimensions) { params.dimensions = this.dimensions; } const { data } = await this.embeddingWithRetry(params); return data[0].embedding; } /** * Private method to make a request to the OpenAI API to generate * embeddings. Handles the retry logic and returns the response from the * API. * @param request Request to send to the OpenAI API. * @returns Promise that resolves to the response from the API. */ async embeddingWithRetry(request) { if (!this.client) { const openAIEndpointConfig = { baseURL: this.clientConfig.baseURL, }; const endpoint = (0, azure_js_1.getEndpoint)(openAIEndpointConfig); const params = { ...this.clientConfig, baseURL: endpoint, timeout: this.timeout, maxRetries: 0, }; if (!params.baseURL) { delete params.baseURL; } this.client = new openai_1.OpenAI(params); } const requestOptions = {}; return this.caller.call(async () => { try { const res = await this.client.embeddings.create(request, requestOptions); return res; } catch (e) { const error = (0, openai_js_1.wrapOpenAIClientError)(e); throw error; } }); } } exports.OpenAIEmbeddings = OpenAIEmbeddings;