UNPKG

@langchain/community

Version:
1 lines 7.81 kB
{"version":3,"file":"alibaba_tongyi.cjs","names":["Embeddings"],"sources":["../../src/embeddings/alibaba_tongyi.ts"],"sourcesContent":["import { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport { chunkArray } from \"@langchain/core/utils/chunk_array\";\n\nexport interface AlibabaTongyiEmbeddingsParams extends EmbeddingsParams {\n /** Model name to use */\n modelName:\n | \"multimodal-embedding-v1\"\n | \"text-embedding-v1\"\n | \"text-embedding-v2\"\n | \"text-embedding-v3\"\n | \"text-embedding-v4\";\n\n /**\n * Timeout to use when making requests to AlibabaTongyi.\n */\n timeout?: number;\n\n /**\n * The maximum number of documents to embed in a single request. This is\n * limited by the AlibabaTongyi API to a maximum of 2048.\n */\n batchSize?: number;\n\n /**\n * Whether to strip new lines from the input text.\n */\n stripNewLines?: boolean;\n\n parameters?: {\n /**\n * 取值:query 或者 document,默认值为 document\n * 说明:文本转换为向量后可以应用于检索、聚类、分类等下游任务,\n * \t对检索这类非对称任务为了达到更好的检索效果建议区分查询文本(query)和\n * \t底库文本(document)类型, 聚类、分类等对称任务可以不用特殊指定,\n * \t采用系统默认值\"document\"即可\n */\n text_type?: \"query\" | \"document\";\n };\n}\n\ninterface EmbeddingCreateParams {\n model: AlibabaTongyiEmbeddingsParams[\"modelName\"];\n input: {\n texts: string[];\n };\n\n parameters?: AlibabaTongyiEmbeddingsParams[\"parameters\"];\n}\n\ninterface EmbeddingResponse {\n output: {\n embeddings: { text_index: number; embedding: number[] }[];\n };\n\n usage: {\n total_tokens: number;\n };\n\n request_id: string;\n}\n\ninterface EmbeddingErrorResponse {\n code: string;\n message: string;\n request_id: string;\n}\n\nexport class AlibabaTongyiEmbeddings\n extends Embeddings\n implements AlibabaTongyiEmbeddingsParams\n{\n modelName: AlibabaTongyiEmbeddingsParams[\"modelName\"] = \"text-embedding-v2\";\n\n batchSize = 24;\n\n stripNewLines = true;\n\n apiKey: string;\n\n parameters: EmbeddingCreateParams[\"parameters\"];\n\n constructor(\n fields?: Partial<AlibabaTongyiEmbeddingsParams> & {\n verbose?: boolean;\n apiKey?: string;\n }\n ) {\n const fieldsWithDefaults = { maxConcurrency: 2, ...fields };\n super(fieldsWithDefaults);\n\n const apiKey =\n fieldsWithDefaults?.apiKey ?? getEnvironmentVariable(\"ALIBABA_API_KEY\");\n\n if (!apiKey) throw new Error(\"AlibabaAI API key not found\");\n\n this.apiKey = apiKey;\n\n this.modelName = fieldsWithDefaults?.modelName ?? this.modelName;\n this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize;\n this.stripNewLines =\n fieldsWithDefaults?.stripNewLines ?? this.stripNewLines;\n\n this.parameters = {\n text_type: fieldsWithDefaults?.parameters?.text_type ?? \"document\",\n };\n }\n\n /**\n * Method to generate embeddings for an array of documents. Splits the\n * documents into batches and makes requests to the AlibabaTongyi API to generate\n * embeddings.\n * @param texts Array of documents to generate embeddings for.\n * @returns Promise that resolves to a 2D array of embeddings for each document.\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n const batches = chunkArray(\n this.stripNewLines ? texts.map((t) => t.replace(/\\n/g, \" \")) : texts,\n this.batchSize\n );\n const batchRequests = batches.map((batch) => {\n const params = this.getParams(batch);\n\n return this.embeddingWithRetry(params);\n });\n\n const batchResponses = await Promise.all(batchRequests);\n const embeddings: number[][] = [];\n\n for (let i = 0; i < batchResponses.length; i += 1) {\n const batch = batches[i];\n const batchResponse = batchResponses[i] || [];\n for (let j = 0; j < batch.length; j += 1) {\n embeddings.push(batchResponse[j]);\n }\n }\n\n return embeddings;\n }\n\n /**\n * Method to generate an embedding for a single document. Calls the\n * embeddingWithRetry method with the document as the input.\n * @param text Document to generate an embedding for.\n * @returns Promise that resolves to an embedding for the document.\n */\n async embedQuery(text: string): Promise<number[]> {\n const params = this.getParams([\n this.stripNewLines ? text.replace(/\\n/g, \" \") : text,\n ]);\n\n const embeddings = (await this.embeddingWithRetry(params)) || [[]];\n return embeddings[0];\n }\n\n /**\n * Method to generate an embedding params.\n * @param texts Array of documents to generate embeddings for.\n * @returns an embedding params.\n */\n private getParams(\n texts: EmbeddingCreateParams[\"input\"][\"texts\"]\n ): EmbeddingCreateParams {\n return {\n model: this.modelName,\n input: {\n texts,\n },\n parameters: this.parameters,\n };\n }\n\n /**\n * Private method to make a request to the OpenAI API to generate\n * embeddings. Handles the retry logic and returns the response from the\n * API.\n * @param request Request to send to the OpenAI API.\n * @returns Promise that resolves to the response from the API.\n */\n private async embeddingWithRetry(body: EmbeddingCreateParams) {\n return fetch(\n \"https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding\",\n {\n method: \"POST\",\n headers: {\n \"Content-Type\": \"application/json\",\n Authorization: `Bearer ${this.apiKey}`,\n },\n body: JSON.stringify(body),\n }\n ).then(async (response) => {\n const embeddingData: EmbeddingResponse | EmbeddingErrorResponse =\n await response.json();\n\n if (\"code\" in embeddingData && embeddingData.code) {\n throw new Error(`${embeddingData.code}: ${embeddingData.message}`);\n }\n\n return (embeddingData as EmbeddingResponse).output.embeddings.map(\n ({ embedding }) => embedding\n );\n });\n }\n}\n"],"mappings":";;;;;;;AAoEA,IAAa,0BAAb,cACUA,2BAAAA,WAEV;CACE,YAAwD;CAExD,YAAY;CAEZ,gBAAgB;CAEhB;CAEA;CAEA,YACE,QAIA;EACA,MAAM,qBAAqB;GAAE,gBAAgB;GAAG,GAAG;GAAQ;AAC3D,QAAM,mBAAmB;EAEzB,MAAM,SACJ,oBAAoB,WAAA,GAAA,0BAAA,wBAAiC,kBAAkB;AAEzE,MAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,8BAA8B;AAE3D,OAAK,SAAS;AAEd,OAAK,YAAY,oBAAoB,aAAa,KAAK;AACvD,OAAK,YAAY,oBAAoB,aAAa,KAAK;AACvD,OAAK,gBACH,oBAAoB,iBAAiB,KAAK;AAE5C,OAAK,aAAa,EAChB,WAAW,oBAAoB,YAAY,aAAa,YACzD;;;;;;;;;CAUH,MAAM,eAAe,OAAsC;EACzD,MAAM,WAAA,GAAA,kCAAA,YACJ,KAAK,gBAAgB,MAAM,KAAK,MAAM,EAAE,QAAQ,OAAO,IAAI,CAAC,GAAG,OAC/D,KAAK,UACN;EACD,MAAM,gBAAgB,QAAQ,KAAK,UAAU;GAC3C,MAAM,SAAS,KAAK,UAAU,MAAM;AAEpC,UAAO,KAAK,mBAAmB,OAAO;IACtC;EAEF,MAAM,iBAAiB,MAAM,QAAQ,IAAI,cAAc;EACvD,MAAM,aAAyB,EAAE;AAEjC,OAAK,IAAI,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;GACjD,MAAM,QAAQ,QAAQ;GACtB,MAAM,gBAAgB,eAAe,MAAM,EAAE;AAC7C,QAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,EACrC,YAAW,KAAK,cAAc,GAAG;;AAIrC,SAAO;;;;;;;;CAST,MAAM,WAAW,MAAiC;EAChD,MAAM,SAAS,KAAK,UAAU,CAC5B,KAAK,gBAAgB,KAAK,QAAQ,OAAO,IAAI,GAAG,KACjD,CAAC;AAGF,UADoB,MAAM,KAAK,mBAAmB,OAAO,IAAK,CAAC,EAAE,CAAC,EAChD;;;;;;;CAQpB,UACE,OACuB;AACvB,SAAO;GACL,OAAO,KAAK;GACZ,OAAO,EACL,OACD;GACD,YAAY,KAAK;GAClB;;;;;;;;;CAUH,MAAc,mBAAmB,MAA6B;AAC5D,SAAO,MACL,2FACA;GACE,QAAQ;GACR,SAAS;IACP,gBAAgB;IAChB,eAAe,UAAU,KAAK;IAC/B;GACD,MAAM,KAAK,UAAU,KAAK;GAC3B,CACF,CAAC,KAAK,OAAO,aAAa;GACzB,MAAM,gBACJ,MAAM,SAAS,MAAM;AAEvB,OAAI,UAAU,iBAAiB,cAAc,KAC3C,OAAM,IAAI,MAAM,GAAG,cAAc,KAAK,IAAI,cAAc,UAAU;AAGpE,UAAQ,cAAoC,OAAO,WAAW,KAC3D,EAAE,gBAAgB,UACpB;IACD"}