polyfact
Version:
<h1 align="center">PolyFact</h1>
67 lines (66 loc) • 2.15 kB
JavaScript
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
if (ar || !(i in from)) {
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
ar[i] = from[i];
}
}
return to.concat(ar || Array.prototype.slice.call(from));
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.splitString = exports.tokenCount = void 0;
var js_tiktoken_1 = require("js-tiktoken");
function stirling(n) {
return Math.pow((n / Math.E), n) * Math.sqrt(2.0 * Math.PI * n);
}
function binomialScore(curr, max) {
var n = 30.0;
var k = (n - 2.0) * (curr / max) + 1;
return Math.sqrt((stirling(n) / (stirling(k) * stirling(n - k))) * Math.pow(0.5, k) * Math.pow(0.5, (n - k)));
}
function newLineScore(s, i) {
if (s.length === i + 1 || s[i] !== "\n") {
return 1.0;
}
if (s[i + 1] === "\n") {
return 50.0;
}
return 5.0;
}
function binarySplit(s) {
var maxScore = 0.0;
var maxScoreI = 0;
var lastNewLine = -1;
for (var i = 0; i < s.length; i++) {
var score = binomialScore(i, s.length) * newLineScore(s, i);
if (s[i] === "\n") {
if (s[lastNewLine + 1] !== "\t") {
score *= 50.0;
}
lastNewLine = i;
}
if (s[i] === " ") {
score *= 2.0;
}
if (score > maxScore) {
maxScore = score;
maxScoreI = i;
}
}
return [s.slice(0, maxScoreI), s.slice(maxScoreI)];
}
function tokenCount(s, model) {
if (model === void 0) { model = "cl100k_base"; }
var enc = (0, js_tiktoken_1.getEncoding)(model);
return enc.encode(s).length;
}
exports.tokenCount = tokenCount;
function splitString(s, maxToken) {
if (tokenCount(s) < maxToken) {
return [s];
}
var _a = binarySplit(s), left = _a[0], right = _a[1];
return __spreadArray(__spreadArray([], splitString(left, maxToken), true), splitString(right, maxToken), true);
}
exports.splitString = splitString;
;