@n8n/n8n-nodes-langchain
Version:

119 lines • 4.13 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var token_estimator_exports = {};
__export(token_estimator_exports, {
estimateTextSplitsByTokens: () => estimateTextSplitsByTokens,
estimateTokensByCharCount: () => estimateTokensByCharCount,
estimateTokensFromStringList: () => estimateTokensFromStringList
});
module.exports = __toCommonJS(token_estimator_exports);
var import_tiktoken = require("./tiktoken");
var import_helpers = require("../helpers");
const MODEL_CHAR_PER_TOKEN_RATIOS = {
"gpt-4o": 3.8,
"gpt-4": 4,
"gpt-3.5-turbo": 4,
cl100k_base: 4,
o200k_base: 3.5,
p50k_base: 4.2,
r50k_base: 4.2
};
function estimateTokensByCharCount(text, model = "cl100k_base") {
try {
if (!text || typeof text !== "string" || text.length === 0) {
return 0;
}
const charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4;
if (!Number.isFinite(charsPerToken) || charsPerToken <= 0) {
const estimatedTokens2 = Math.ceil(text.length / 4);
return estimatedTokens2;
}
const estimatedTokens = Math.ceil(text.length / charsPerToken);
return estimatedTokens;
} catch (error) {
return Math.ceil((text?.length || 0) / 4);
}
}
function estimateTextSplitsByTokens(text, chunkSize, chunkOverlap, model = "cl100k_base") {
try {
if (!text || typeof text !== "string" || text.length === 0) {
return [];
}
if (!Number.isFinite(chunkSize) || chunkSize <= 0) {
return [text];
}
const validOverlap = Number.isFinite(chunkOverlap) && chunkOverlap >= 0 ? Math.min(chunkOverlap, chunkSize - 1) : 0;
const charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4;
const chunkSizeInChars = Math.floor(chunkSize * charsPerToken);
const overlapInChars = Math.floor(validOverlap * charsPerToken);
const chunks = [];
let start = 0;
while (start < text.length) {
const end = Math.min(start + chunkSizeInChars, text.length);
chunks.push(text.slice(start, end));
if (end >= text.length) {
break;
}
start = Math.max(end - overlapInChars, start + 1);
}
return chunks;
} catch (error) {
return text ? [text] : [];
}
}
async function estimateTokensFromStringList(list, model) {
try {
if (!Array.isArray(list)) {
return 0;
}
const encoder = await (0, import_tiktoken.encodingForModel)(model);
const encodedListLength = await Promise.all(
list.map(async (text) => {
try {
if (!text || typeof text !== "string") {
return 0;
}
if ((0, import_helpers.hasLongSequentialRepeat)(text)) {
const estimatedTokens = estimateTokensByCharCount(text, model);
return estimatedTokens;
}
try {
const tokens = encoder.encode(text);
return tokens.length;
} catch (encodingError) {
return estimateTokensByCharCount(text, model);
}
} catch (itemError) {
return 0;
}
})
);
const totalTokens = encodedListLength.reduce((acc, curr) => acc + curr, 0);
return totalTokens;
} catch (error) {
return 0;
}
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
estimateTextSplitsByTokens,
estimateTokensByCharCount,
estimateTokensFromStringList
});
//# sourceMappingURL=token-estimator.js.map