@ai2070/l0
Version:
L0: The Missing Reliability Substrate for AI
235 lines (234 loc) • 6.72 kB
JavaScript
function isMeaningfulToken(token) {
if (!token || token.length === 0) {
return false;
}
const trimmed = token.trim();
if (trimmed.length === 0) {
return false;
}
if (/^[\r\n\t\s]+$/.test(token)) {
return false;
}
return true;
}
function hasMeaningfulContent(content) {
if (!content || content.length === 0) {
return false;
}
const trimmed = content.trim();
if (trimmed.length === 0) {
return false;
}
if (/^[\r\n\t\s]+$/.test(content)) {
return false;
}
return true;
}
function countMeaningfulTokens(content) {
if (!content || !hasMeaningfulContent(content)) {
return 0;
}
const trimmed = content.trim();
const tokens = trimmed.split(/\s+/).filter((t) => t.length > 0);
return tokens.length;
}
function extractMeaningfulTokens(content) {
if (!content || !hasMeaningfulContent(content)) {
return [];
}
const trimmed = content.trim();
return trimmed.split(/\s+/).filter((t) => t.length > 0);
}
function isPunctuationOnly(token) {
if (!token || token.length === 0) {
return false;
}
return /^[^\w\s]+$/.test(token);
}
function isAlphanumeric(token) {
if (!token || token.length === 0) {
return false;
}
return /[a-zA-Z0-9]/.test(token);
}
function normalizeToken(token) {
return token.trim().toLowerCase();
}
function tokensEqual(token1, token2) {
return normalizeToken(token1) === normalizeToken(token2);
}
function detectRepeatedTokens(content, threshold = 3) {
if (!content || !hasMeaningfulContent(content)) {
return [];
}
const tokens = extractMeaningfulTokens(content);
const repeated = [];
const counts = /* @__PURE__ */ new Map();
for (const token of tokens) {
const normalized = normalizeToken(token);
const count = (counts.get(normalized) || 0) + 1;
counts.set(normalized, count);
if (count === threshold) {
repeated.push(token);
}
}
return repeated;
}
function calculateTokenDensity(content) {
if (!content || content.length === 0) {
return 0;
}
const tokenCount = countMeaningfulTokens(content);
return tokenCount / content.length;
}
function estimateTokenCount(content) {
if (!content || content.length === 0) {
return 0;
}
const wordCount = countMeaningfulTokens(content);
const charEstimate = Math.ceil(content.length / 4);
return Math.ceil((wordCount + charEstimate) / 2);
}
function startsWithMeaningfulToken(content) {
if (!content || content.length === 0) {
return false;
}
const firstChar = content.trimStart()[0];
if (!firstChar) {
return false;
}
return isMeaningfulToken(firstChar);
}
function getFirstMeaningfulToken(content) {
const tokens = extractMeaningfulTokens(content);
return tokens.length > 0 ? tokens[0] ?? null : null;
}
function getLastMeaningfulToken(content) {
const tokens = extractMeaningfulTokens(content);
return tokens.length > 0 ? tokens[tokens.length - 1] ?? null : null;
}
function endsAbruptly(content) {
if (!content || !hasMeaningfulContent(content)) {
return false;
}
const trimmed = content.trim();
const endsWithPunctuation = /[.!?;:]$/.test(trimmed);
const endsWithClosure = /[)\]}]$/.test(trimmed);
return !endsWithPunctuation && !endsWithClosure;
}
function chunkByTokens(content, chunkSize) {
if (!content || !hasMeaningfulContent(content)) {
return [];
}
const tokens = extractMeaningfulTokens(content);
const chunks = [];
for (let i = 0; i < tokens.length; i += chunkSize) {
const chunk = tokens.slice(i, i + chunkSize).join(" ");
chunks.push(chunk);
}
return chunks;
}
function detectOverlap(checkpoint, continuation, options = {}) {
if (!checkpoint || !continuation || checkpoint.length === 0 || continuation.length === 0) {
return {
overlapLength: 0,
overlapText: "",
deduplicatedContinuation: continuation || "",
hasOverlap: false
};
}
const {
minOverlap = 2,
maxOverlap = Math.min(500, continuation.length),
caseSensitive = true,
normalizeWhitespace = false
} = options;
let checkpointForMatch = checkpoint;
let continuationForMatch = continuation;
if (!caseSensitive) {
checkpointForMatch = checkpoint.toLowerCase();
continuationForMatch = continuation.toLowerCase();
}
if (normalizeWhitespace) {
checkpointForMatch = checkpointForMatch.replace(/\s+/g, " ");
continuationForMatch = continuationForMatch.replace(/\s+/g, " ");
}
const maxPossibleOverlap = Math.min(
checkpointForMatch.length,
continuationForMatch.length,
maxOverlap
);
if (maxPossibleOverlap < minOverlap) {
return {
overlapLength: 0,
overlapText: "",
deduplicatedContinuation: continuation,
hasOverlap: false
};
}
for (let len = maxPossibleOverlap; len >= minOverlap; len--) {
const suffix = checkpointForMatch.slice(-len);
const prefix = continuationForMatch.slice(0, len);
if (suffix === prefix) {
let actualOverlapLength = len;
if (normalizeWhitespace) {
let normalizedPos = 0;
let originalPos = 0;
const normalizedPrefix = continuationForMatch.slice(0, len);
while (normalizedPos < normalizedPrefix.length && originalPos < continuation.length) {
if (/\s/.test(continuation[originalPos])) {
if (normalizedPrefix[normalizedPos] === " ") {
normalizedPos++;
originalPos++;
while (originalPos < continuation.length && /\s/.test(continuation[originalPos])) {
originalPos++;
}
} else {
originalPos++;
}
} else {
normalizedPos++;
originalPos++;
}
}
actualOverlapLength = originalPos;
}
return {
overlapLength: actualOverlapLength,
overlapText: continuation.slice(0, actualOverlapLength),
deduplicatedContinuation: continuation.slice(actualOverlapLength),
hasOverlap: true
};
}
}
return {
overlapLength: 0,
overlapText: "",
deduplicatedContinuation: continuation,
hasOverlap: false
};
}
function deduplicateContinuation(checkpoint, continuation, options = {}) {
return detectOverlap(checkpoint, continuation, options).deduplicatedContinuation;
}
export {
calculateTokenDensity,
chunkByTokens,
countMeaningfulTokens,
deduplicateContinuation,
detectOverlap,
detectRepeatedTokens,
endsAbruptly,
estimateTokenCount,
extractMeaningfulTokens,
getFirstMeaningfulToken,
getLastMeaningfulToken,
hasMeaningfulContent,
isAlphanumeric,
isMeaningfulToken,
isPunctuationOnly,
normalizeToken,
startsWithMeaningfulToken,
tokensEqual
};
//# sourceMappingURL=tokens.js.map