@langchain/community
Version:
Third-party integrations for LangChain.js
1 lines • 5.19 kB
Source Map (JSON)
{"version":3,"file":"huggingface_transformers.cjs","names":["Embeddings"],"sources":["../../src/embeddings/huggingface_transformers.ts"],"sourcesContent":["import type {\n PretrainedOptions,\n FeatureExtractionPipelineOptions,\n FeatureExtractionPipeline,\n} from \"@huggingface/transformers\";\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport { chunkArray } from \"@langchain/core/utils/chunk_array\";\n\nexport interface HuggingFaceTransformersEmbeddingsParams extends EmbeddingsParams {\n /** Model name to use */\n model: string;\n\n /**\n * Timeout to use when making requests to OpenAI.\n */\n timeout?: number;\n\n /**\n * The maximum number of documents to embed in a single request.\n */\n batchSize?: number;\n\n /**\n * Whether to strip new lines from the input text. This is recommended by\n * OpenAI, but may not be suitable for all use cases.\n */\n stripNewLines?: boolean;\n\n /**\n * Optional parameters for the pretrained model.\n */\n pretrainedOptions?: PretrainedOptions;\n\n /**\n * Optional parameters for the pipeline.\n */\n pipelineOptions?: FeatureExtractionPipelineOptions;\n}\n\n/**\n * @example\n * ```typescript\n * const model = new HuggingFaceTransformersEmbeddings({\n * model: \"Xenova/all-MiniLM-L6-v2\",\n * });\n *\n * // Embed a single query\n * const res = await model.embedQuery(\n * \"What would be a good company name for a company that makes colorful socks?\"\n * );\n * console.log({ res });\n *\n * // Embed multiple documents\n * const documentRes = await model.embedDocuments([\"Hello world\", \"Bye bye\"]);\n * console.log({ documentRes });\n * ```\n */\nexport class HuggingFaceTransformersEmbeddings\n extends Embeddings\n implements HuggingFaceTransformersEmbeddingsParams\n{\n model = \"Xenova/all-MiniLM-L6-v2\";\n\n batchSize = 512;\n\n stripNewLines = true;\n\n timeout?: number;\n\n pretrainedOptions?: PretrainedOptions;\n\n pipelineOptions?: FeatureExtractionPipelineOptions;\n\n private pipelinePromise: Promise<FeatureExtractionPipeline> | null = null;\n\n constructor(fields?: Partial<HuggingFaceTransformersEmbeddingsParams>) {\n super(fields ?? {});\n\n this.model = fields?.model ?? this.model;\n this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;\n this.timeout = fields?.timeout;\n this.pretrainedOptions = fields?.pretrainedOptions ?? {};\n this.pipelineOptions = {\n pooling: \"mean\",\n normalize: true,\n ...fields?.pipelineOptions,\n };\n }\n\n async embedDocuments(texts: string[]): Promise<number[][]> {\n const batches = chunkArray(\n this.stripNewLines ? texts.map((t) => t.replace(/\\n/g, \" \")) : texts,\n this.batchSize\n );\n\n const batchRequests = batches.map((batch) => this.runEmbedding(batch));\n const batchResponses = await Promise.all(batchRequests);\n const embeddings: number[][] = [];\n\n for (let i = 0; i < batchResponses.length; i += 1) {\n const batchResponse = batchResponses[i];\n for (let j = 0; j < batchResponse.length; j += 1) {\n embeddings.push(batchResponse[j]);\n }\n }\n\n return embeddings;\n }\n\n async embedQuery(text: string): Promise<number[]> {\n const data = await this.runEmbedding([\n this.stripNewLines ? text.replace(/\\n/g, \" \") : text,\n ]);\n return data[0];\n }\n\n private async runEmbedding(texts: string[]) {\n if (!this.pipelinePromise) {\n this.pipelinePromise = (async () => {\n const transformers = await import(\"@huggingface/transformers\");\n const pipeline = transformers.pipeline;\n const result = await pipeline(\n \"feature-extraction\",\n this.model,\n this.pretrainedOptions\n );\n return result as FeatureExtractionPipeline;\n })();\n }\n\n const pipe = await this.pipelinePromise;\n return this.caller.call(async () => {\n const output = await pipe(texts, this.pipelineOptions);\n return output.tolist();\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AAyDA,IAAa,oCAAb,cACUA,2BAAAA,WAEV;CACE,QAAQ;CAER,YAAY;CAEZ,gBAAgB;CAEhB;CAEA;CAEA;CAEA,kBAAqE;CAErE,YAAY,QAA2D;AACrE,QAAM,UAAU,EAAE,CAAC;AAEnB,OAAK,QAAQ,QAAQ,SAAS,KAAK;AACnC,OAAK,gBAAgB,QAAQ,iBAAiB,KAAK;AACnD,OAAK,UAAU,QAAQ;AACvB,OAAK,oBAAoB,QAAQ,qBAAqB,EAAE;AACxD,OAAK,kBAAkB;GACrB,SAAS;GACT,WAAW;GACX,GAAG,QAAQ;GACZ;;CAGH,MAAM,eAAe,OAAsC;EAMzD,MAAM,iBAAA,GAAA,kCAAA,YAJJ,KAAK,gBAAgB,MAAM,KAAK,MAAM,EAAE,QAAQ,OAAO,IAAI,CAAC,GAAG,OAC/D,KAAK,UACN,CAE6B,KAAK,UAAU,KAAK,aAAa,MAAM,CAAC;EACtE,MAAM,iBAAiB,MAAM,QAAQ,IAAI,cAAc;EACvD,MAAM,aAAyB,EAAE;AAEjC,OAAK,IAAI,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;GACjD,MAAM,gBAAgB,eAAe;AACrC,QAAK,IAAI,IAAI,GAAG,IAAI,cAAc,QAAQ,KAAK,EAC7C,YAAW,KAAK,cAAc,GAAG;;AAIrC,SAAO;;CAGT,MAAM,WAAW,MAAiC;AAIhD,UAHa,MAAM,KAAK,aAAa,CACnC,KAAK,gBAAgB,KAAK,QAAQ,OAAO,IAAI,GAAG,KACjD,CAAC,EACU;;CAGd,MAAc,aAAa,OAAiB;AAC1C,MAAI,CAAC,KAAK,gBACR,MAAK,mBAAmB,YAAY;GAElC,MAAM,YADe,MAAM,OAAO,8BACJ;AAM9B,UALe,MAAM,SACnB,sBACA,KAAK,OACL,KAAK,kBACN;MAEC;EAGN,MAAM,OAAO,MAAM,KAAK;AACxB,SAAO,KAAK,OAAO,KAAK,YAAY;AAElC,WADe,MAAM,KAAK,OAAO,KAAK,gBAAgB,EACxC,QAAQ;IACtB"}