UNPKG

polyfact

Version:

<h1 align="center">PolyFact</h1>

67 lines (66 loc) 2.15 kB
"use strict"; var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) { if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) { if (ar || !(i in from)) { if (!ar) ar = Array.prototype.slice.call(from, 0, i); ar[i] = from[i]; } } return to.concat(ar || Array.prototype.slice.call(from)); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.splitString = exports.tokenCount = void 0; var js_tiktoken_1 = require("js-tiktoken"); function stirling(n) { return Math.pow((n / Math.E), n) * Math.sqrt(2.0 * Math.PI * n); } function binomialScore(curr, max) { var n = 30.0; var k = (n - 2.0) * (curr / max) + 1; return Math.sqrt((stirling(n) / (stirling(k) * stirling(n - k))) * Math.pow(0.5, k) * Math.pow(0.5, (n - k))); } function newLineScore(s, i) { if (s.length === i + 1 || s[i] !== "\n") { return 1.0; } if (s[i + 1] === "\n") { return 50.0; } return 5.0; } function binarySplit(s) { var maxScore = 0.0; var maxScoreI = 0; var lastNewLine = -1; for (var i = 0; i < s.length; i++) { var score = binomialScore(i, s.length) * newLineScore(s, i); if (s[i] === "\n") { if (s[lastNewLine + 1] !== "\t") { score *= 50.0; } lastNewLine = i; } if (s[i] === " ") { score *= 2.0; } if (score > maxScore) { maxScore = score; maxScoreI = i; } } return [s.slice(0, maxScoreI), s.slice(maxScoreI)]; } function tokenCount(s, model) { if (model === void 0) { model = "cl100k_base"; } var enc = (0, js_tiktoken_1.getEncoding)(model); return enc.encode(s).length; } exports.tokenCount = tokenCount; function splitString(s, maxToken) { if (tokenCount(s) < maxToken) { return [s]; } var _a = binarySplit(s), left = _a[0], right = _a[1]; return __spreadArray(__spreadArray([], splitString(left, maxToken), true), splitString(right, maxToken), true); } exports.splitString = splitString;