UNPKG

@langchain/openai

Version:

OpenAI integrations for LangChain.js

github.com/langchain-ai/langchainjs/tree/main/libs/langchain-openai/

langchain-ai/langchainjs

192 lines (191 loc) • 7.03 kB

JavaScript

import { OpenAI as OpenAIClient } from "openai"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { Embeddings } from "@langchain/core/embeddings"; import { chunkArray } from "@langchain/core/utils/chunk_array"; import { getEndpoint } from "./utils/azure.js"; import { wrapOpenAIClientError } from "./utils/openai.js"; /** * Class for generating embeddings using the OpenAI API. * * To use with Azure, import the `AzureOpenAIEmbeddings` class. * * @example * ```typescript * // Embed a query using OpenAIEmbeddings to generate embeddings for a given text * const model = new OpenAIEmbeddings(); * const res = await model.embedQuery( * "What would be a good company name for a company that makes colorful socks?", * ); * console.log({ res }); * * ``` */ export class OpenAIEmbeddings extends Embeddings { constructor(fields) { const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; super(fieldsWithDefaults); Object.defineProperty(this, "model", { enumerable: true, configurable: true, writable: true, value: "text-embedding-ada-002" }); /** @deprecated Use "model" instead */ Object.defineProperty(this, "modelName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "batchSize", { enumerable: true, configurable: true, writable: true, value: 512 }); // TODO: Update to `false` on next minor release (see: https://github.com/langchain-ai/langchainjs/pull/3612) Object.defineProperty(this, "stripNewLines", { enumerable: true, configurable: true, writable: true, value: true }); /** * The number of dimensions the resulting output embeddings should have. * Only supported in `text-embedding-3` and later models. */ Object.defineProperty(this, "dimensions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "timeout", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "organization", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "client", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "clientConfig", { enumerable: true, configurable: true, writable: true, value: void 0 }); const apiKey = fieldsWithDefaults?.apiKey ?? fieldsWithDefaults?.openAIApiKey ?? getEnvironmentVariable("OPENAI_API_KEY"); this.organization = fieldsWithDefaults?.configuration?.organization ?? getEnvironmentVariable("OPENAI_ORGANIZATION"); this.model = fieldsWithDefaults?.model ?? fieldsWithDefaults?.modelName ?? this.model; this.modelName = this.model; this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; this.stripNewLines = fieldsWithDefaults?.stripNewLines ?? this.stripNewLines; this.timeout = fieldsWithDefaults?.timeout; this.dimensions = fieldsWithDefaults?.dimensions; this.clientConfig = { apiKey, organization: this.organization, dangerouslyAllowBrowser: true, ...fields?.configuration, }; } /** * Method to generate embeddings for an array of documents. Splits the * documents into batches and makes requests to the OpenAI API to generate * embeddings. * @param texts Array of documents to generate embeddings for. * @returns Promise that resolves to a 2D array of embeddings for each document. */ async embedDocuments(texts) { const batches = chunkArray(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize); const batchRequests = batches.map((batch) => { const params = { model: this.model, input: batch, }; if (this.dimensions) { params.dimensions = this.dimensions; } return this.embeddingWithRetry(params); }); const batchResponses = await Promise.all(batchRequests); const embeddings = []; for (let i = 0; i < batchResponses.length; i += 1) { const batch = batches[i]; const { data: batchResponse } = batchResponses[i]; for (let j = 0; j < batch.length; j += 1) { embeddings.push(batchResponse[j].embedding); } } return embeddings; } /** * Method to generate an embedding for a single document. Calls the * embeddingWithRetry method with the document as the input. * @param text Document to generate an embedding for. * @returns Promise that resolves to an embedding for the document. */ async embedQuery(text) { const params = { model: this.model, input: this.stripNewLines ? text.replace(/\n/g, " ") : text, }; if (this.dimensions) { params.dimensions = this.dimensions; } const { data } = await this.embeddingWithRetry(params); return data[0].embedding; } /** * Private method to make a request to the OpenAI API to generate * embeddings. Handles the retry logic and returns the response from the * API. * @param request Request to send to the OpenAI API. * @returns Promise that resolves to the response from the API. */ async embeddingWithRetry(request) { if (!this.client) { const openAIEndpointConfig = { baseURL: this.clientConfig.baseURL, }; const endpoint = getEndpoint(openAIEndpointConfig); const params = { ...this.clientConfig, baseURL: endpoint, timeout: this.timeout, maxRetries: 0, }; if (!params.baseURL) { delete params.baseURL; } this.client = new OpenAIClient(params); } const requestOptions = {}; return this.caller.call(async () => { try { const res = await this.client.embeddings.create(request, requestOptions); return res; } catch (e) { const error = wrapOpenAIClientError(e); throw error; } }); } }