@langchain/community
Version:
Third-party integrations for LangChain.js
1 lines • 7.37 kB
Source Map (JSON)
{"version":3,"file":"jina.cjs","names":["Embeddings"],"sources":["../../src/embeddings/jina.ts"],"sourcesContent":["import { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport { chunkArray } from \"@langchain/core/utils/chunk_array\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\n\nexport interface JinaEmbeddingsParams extends EmbeddingsParams {\n /** Model name to use */\n model:\n | \"jina-clip-v2\"\n | \"jina-embeddings-v3\"\n | \"jina-colbert-v2\"\n | \"jina-clip-v1\"\n | \"jina-colbert-v1-en\"\n | \"jina-embeddings-v2-base-es\"\n | \"jina-embeddings-v2-base-code\"\n | \"jina-embeddings-v2-base-de\"\n | \"jina-embeddings-v2-base-zh\"\n | \"jina-embeddings-v2-base-en\"\n | string;\n\n baseUrl?: string;\n\n /**\n * Timeout to use when making requests to Jina.\n */\n timeout?: number;\n\n /**\n * The maximum number of documents to embed in a single request.\n */\n batchSize?: number;\n\n /**\n * Whether to strip new lines from the input text.\n */\n stripNewLines?: boolean;\n\n /**\n * The dimensions of the embedding.\n */\n dimensions?: number;\n\n /**\n * Scales the embedding so its Euclidean (L2) norm becomes 1, preserving direction. Useful when downstream involves dot-product, classification, visualization..\n */\n normalized?: boolean;\n}\n\ntype JinaMultiModelInput =\n | {\n text: string;\n image?: never;\n }\n | {\n image: string;\n text?: never;\n };\n\nexport type JinaEmbeddingsInput = string | JinaMultiModelInput;\n\ninterface EmbeddingCreateParams {\n model: JinaEmbeddingsParams[\"model\"];\n\n /**\n * input can be strings or JinaMultiModelInputs,if you want embed image,you should use JinaMultiModelInputs\n */\n input: JinaEmbeddingsInput[];\n dimensions: number;\n task: \"retrieval.query\" | \"retrieval.passage\";\n normalized?: boolean;\n}\n\ninterface EmbeddingResponse {\n model: string;\n object: string;\n usage: {\n total_tokens: number;\n prompt_tokens: number;\n };\n data: {\n object: string;\n index: number;\n embedding: number[];\n }[];\n}\n\ninterface EmbeddingErrorResponse {\n detail: string;\n}\n\nexport class JinaEmbeddings extends Embeddings implements JinaEmbeddingsParams {\n model: JinaEmbeddingsParams[\"model\"] = \"jina-clip-v2\";\n\n batchSize = 24;\n\n baseUrl = \"https://api.jina.ai/v1/embeddings\";\n\n stripNewLines = true;\n\n dimensions = 1024;\n\n apiKey: string;\n\n normalized = true;\n\n constructor(\n fields?: Partial<JinaEmbeddingsParams> & {\n apiKey?: string;\n }\n ) {\n const fieldsWithDefaults = { maxConcurrency: 2, ...fields };\n super(fieldsWithDefaults);\n\n const apiKey =\n fieldsWithDefaults?.apiKey ||\n getEnvironmentVariable(\"JINA_API_KEY\") ||\n getEnvironmentVariable(\"JINA_AUTH_TOKEN\");\n\n if (!apiKey) throw new Error(\"Jina API key not found\");\n\n this.apiKey = apiKey;\n\n this.model = fieldsWithDefaults?.model ?? this.model;\n this.dimensions = fieldsWithDefaults?.dimensions ?? this.dimensions;\n this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize;\n this.stripNewLines =\n fieldsWithDefaults?.stripNewLines ?? this.stripNewLines;\n this.normalized = fieldsWithDefaults?.normalized ?? this.normalized;\n }\n\n private doStripNewLines(input: JinaEmbeddingsInput[]) {\n if (this.stripNewLines) {\n return input.map((i) => {\n if (typeof i === \"string\") {\n return i.replace(/\\n/g, \" \");\n }\n if (i.text) {\n return { text: i.text.replace(/\\n/g, \" \") };\n }\n return i;\n });\n }\n return input;\n }\n\n async embedDocuments(input: JinaEmbeddingsInput[]): Promise<number[][]> {\n const batches = chunkArray(this.doStripNewLines(input), this.batchSize);\n const batchRequests = batches.map((batch) => {\n const params = this.getParams(batch);\n return this.embeddingWithRetry(params);\n });\n\n const batchResponses = await Promise.all(batchRequests);\n const embeddings: number[][] = [];\n\n for (let i = 0; i < batchResponses.length; i += 1) {\n const batch = batches[i];\n const batchResponse = batchResponses[i] || [];\n for (let j = 0; j < batch.length; j += 1) {\n embeddings.push(batchResponse[j]);\n }\n }\n\n return embeddings;\n }\n\n async embedQuery(input: JinaEmbeddingsInput): Promise<number[]> {\n const params = this.getParams(this.doStripNewLines([input]), true);\n\n const embeddings = (await this.embeddingWithRetry(params)) || [[]];\n return embeddings[0];\n }\n\n private getParams(\n input: JinaEmbeddingsInput[],\n query?: boolean\n ): EmbeddingCreateParams {\n return {\n model: this.model,\n input,\n dimensions: this.dimensions,\n task: query ? \"retrieval.query\" : \"retrieval.passage\",\n normalized: this.normalized,\n };\n }\n\n private async embeddingWithRetry(body: EmbeddingCreateParams) {\n const response = await fetch(this.baseUrl, {\n method: \"POST\",\n headers: {\n \"Content-Type\": \"application/json\",\n Authorization: `Bearer ${this.apiKey}`,\n },\n body: JSON.stringify(body),\n });\n const embeddingData: EmbeddingResponse | EmbeddingErrorResponse =\n await response.json();\n if (\"detail\" in embeddingData && embeddingData.detail) {\n throw new Error(`${embeddingData.detail}`);\n }\n return (embeddingData as EmbeddingResponse).data.map(\n ({ embedding }) => embedding\n );\n }\n}\n"],"mappings":";;;;;;;AAyFA,IAAa,iBAAb,cAAoCA,2BAAAA,WAA2C;CAC7E,QAAuC;CAEvC,YAAY;CAEZ,UAAU;CAEV,gBAAgB;CAEhB,aAAa;CAEb;CAEA,aAAa;CAEb,YACE,QAGA;EACA,MAAM,qBAAqB;GAAE,gBAAgB;GAAG,GAAG;GAAQ;AAC3D,QAAM,mBAAmB;EAEzB,MAAM,SACJ,oBAAoB,WAAA,GAAA,0BAAA,wBACG,eAAe,KAAA,GAAA,0BAAA,wBACf,kBAAkB;AAE3C,MAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,yBAAyB;AAEtD,OAAK,SAAS;AAEd,OAAK,QAAQ,oBAAoB,SAAS,KAAK;AAC/C,OAAK,aAAa,oBAAoB,cAAc,KAAK;AACzD,OAAK,YAAY,oBAAoB,aAAa,KAAK;AACvD,OAAK,gBACH,oBAAoB,iBAAiB,KAAK;AAC5C,OAAK,aAAa,oBAAoB,cAAc,KAAK;;CAG3D,gBAAwB,OAA8B;AACpD,MAAI,KAAK,cACP,QAAO,MAAM,KAAK,MAAM;AACtB,OAAI,OAAO,MAAM,SACf,QAAO,EAAE,QAAQ,OAAO,IAAI;AAE9B,OAAI,EAAE,KACJ,QAAO,EAAE,MAAM,EAAE,KAAK,QAAQ,OAAO,IAAI,EAAE;AAE7C,UAAO;IACP;AAEJ,SAAO;;CAGT,MAAM,eAAe,OAAmD;EACtE,MAAM,WAAA,GAAA,kCAAA,YAAqB,KAAK,gBAAgB,MAAM,EAAE,KAAK,UAAU;EACvE,MAAM,gBAAgB,QAAQ,KAAK,UAAU;GAC3C,MAAM,SAAS,KAAK,UAAU,MAAM;AACpC,UAAO,KAAK,mBAAmB,OAAO;IACtC;EAEF,MAAM,iBAAiB,MAAM,QAAQ,IAAI,cAAc;EACvD,MAAM,aAAyB,EAAE;AAEjC,OAAK,IAAI,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;GACjD,MAAM,QAAQ,QAAQ;GACtB,MAAM,gBAAgB,eAAe,MAAM,EAAE;AAC7C,QAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,EACrC,YAAW,KAAK,cAAc,GAAG;;AAIrC,SAAO;;CAGT,MAAM,WAAW,OAA+C;EAC9D,MAAM,SAAS,KAAK,UAAU,KAAK,gBAAgB,CAAC,MAAM,CAAC,EAAE,KAAK;AAGlE,UADoB,MAAM,KAAK,mBAAmB,OAAO,IAAK,CAAC,EAAE,CAAC,EAChD;;CAGpB,UACE,OACA,OACuB;AACvB,SAAO;GACL,OAAO,KAAK;GACZ;GACA,YAAY,KAAK;GACjB,MAAM,QAAQ,oBAAoB;GAClC,YAAY,KAAK;GAClB;;CAGH,MAAc,mBAAmB,MAA6B;EAS5D,MAAM,gBACJ,OATe,MAAM,MAAM,KAAK,SAAS;GACzC,QAAQ;GACR,SAAS;IACP,gBAAgB;IAChB,eAAe,UAAU,KAAK;IAC/B;GACD,MAAM,KAAK,UAAU,KAAK;GAC3B,CAAC,EAEe,MAAM;AACvB,MAAI,YAAY,iBAAiB,cAAc,OAC7C,OAAM,IAAI,MAAM,GAAG,cAAc,SAAS;AAE5C,SAAQ,cAAoC,KAAK,KAC9C,EAAE,gBAAgB,UACpB"}