UNPKG

@n8n/n8n-nodes-langchain

Version:

![Banner image](https://user-images.githubusercontent.com/10284570/173569848-c624317f-42b1-45a6-ab09-f0ea3c247648.png)

119 lines 4.13 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var token_estimator_exports = {}; __export(token_estimator_exports, { estimateTextSplitsByTokens: () => estimateTextSplitsByTokens, estimateTokensByCharCount: () => estimateTokensByCharCount, estimateTokensFromStringList: () => estimateTokensFromStringList }); module.exports = __toCommonJS(token_estimator_exports); var import_tiktoken = require("./tiktoken"); var import_helpers = require("../helpers"); const MODEL_CHAR_PER_TOKEN_RATIOS = { "gpt-4o": 3.8, "gpt-4": 4, "gpt-3.5-turbo": 4, cl100k_base: 4, o200k_base: 3.5, p50k_base: 4.2, r50k_base: 4.2 }; function estimateTokensByCharCount(text, model = "cl100k_base") { try { if (!text || typeof text !== "string" || text.length === 0) { return 0; } const charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4; if (!Number.isFinite(charsPerToken) || charsPerToken <= 0) { const estimatedTokens2 = Math.ceil(text.length / 4); return estimatedTokens2; } const estimatedTokens = Math.ceil(text.length / charsPerToken); return estimatedTokens; } catch (error) { return Math.ceil((text?.length || 0) / 4); } } function estimateTextSplitsByTokens(text, chunkSize, chunkOverlap, model = "cl100k_base") { try { if (!text || typeof text !== "string" || text.length === 0) { return []; } if (!Number.isFinite(chunkSize) || chunkSize <= 0) { return [text]; } const validOverlap = Number.isFinite(chunkOverlap) && chunkOverlap >= 0 ? Math.min(chunkOverlap, chunkSize - 1) : 0; const charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4; const chunkSizeInChars = Math.floor(chunkSize * charsPerToken); const overlapInChars = Math.floor(validOverlap * charsPerToken); const chunks = []; let start = 0; while (start < text.length) { const end = Math.min(start + chunkSizeInChars, text.length); chunks.push(text.slice(start, end)); if (end >= text.length) { break; } start = Math.max(end - overlapInChars, start + 1); } return chunks; } catch (error) { return text ? [text] : []; } } async function estimateTokensFromStringList(list, model) { try { if (!Array.isArray(list)) { return 0; } const encoder = await (0, import_tiktoken.encodingForModel)(model); const encodedListLength = await Promise.all( list.map(async (text) => { try { if (!text || typeof text !== "string") { return 0; } if ((0, import_helpers.hasLongSequentialRepeat)(text)) { const estimatedTokens = estimateTokensByCharCount(text, model); return estimatedTokens; } try { const tokens = encoder.encode(text); return tokens.length; } catch (encodingError) { return estimateTokensByCharCount(text, model); } } catch (itemError) { return 0; } }) ); const totalTokens = encodedListLength.reduce((acc, curr) => acc + curr, 0); return totalTokens; } catch (error) { return 0; } } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { estimateTextSplitsByTokens, estimateTokensByCharCount, estimateTokensFromStringList }); //# sourceMappingURL=token-estimator.js.map