UNPKG

@ai2070/l0

Version:

L0: The Missing Reliability Substrate for AI

235 lines (234 loc) 6.72 kB
function isMeaningfulToken(token) { if (!token || token.length === 0) { return false; } const trimmed = token.trim(); if (trimmed.length === 0) { return false; } if (/^[\r\n\t\s]+$/.test(token)) { return false; } return true; } function hasMeaningfulContent(content) { if (!content || content.length === 0) { return false; } const trimmed = content.trim(); if (trimmed.length === 0) { return false; } if (/^[\r\n\t\s]+$/.test(content)) { return false; } return true; } function countMeaningfulTokens(content) { if (!content || !hasMeaningfulContent(content)) { return 0; } const trimmed = content.trim(); const tokens = trimmed.split(/\s+/).filter((t) => t.length > 0); return tokens.length; } function extractMeaningfulTokens(content) { if (!content || !hasMeaningfulContent(content)) { return []; } const trimmed = content.trim(); return trimmed.split(/\s+/).filter((t) => t.length > 0); } function isPunctuationOnly(token) { if (!token || token.length === 0) { return false; } return /^[^\w\s]+$/.test(token); } function isAlphanumeric(token) { if (!token || token.length === 0) { return false; } return /[a-zA-Z0-9]/.test(token); } function normalizeToken(token) { return token.trim().toLowerCase(); } function tokensEqual(token1, token2) { return normalizeToken(token1) === normalizeToken(token2); } function detectRepeatedTokens(content, threshold = 3) { if (!content || !hasMeaningfulContent(content)) { return []; } const tokens = extractMeaningfulTokens(content); const repeated = []; const counts = /* @__PURE__ */ new Map(); for (const token of tokens) { const normalized = normalizeToken(token); const count = (counts.get(normalized) || 0) + 1; counts.set(normalized, count); if (count === threshold) { repeated.push(token); } } return repeated; } function calculateTokenDensity(content) { if (!content || content.length === 0) { return 0; } const tokenCount = countMeaningfulTokens(content); return tokenCount / content.length; } function estimateTokenCount(content) { if (!content || content.length === 0) { return 0; } const wordCount = countMeaningfulTokens(content); const charEstimate = Math.ceil(content.length / 4); return Math.ceil((wordCount + charEstimate) / 2); } function startsWithMeaningfulToken(content) { if (!content || content.length === 0) { return false; } const firstChar = content.trimStart()[0]; if (!firstChar) { return false; } return isMeaningfulToken(firstChar); } function getFirstMeaningfulToken(content) { const tokens = extractMeaningfulTokens(content); return tokens.length > 0 ? tokens[0] ?? null : null; } function getLastMeaningfulToken(content) { const tokens = extractMeaningfulTokens(content); return tokens.length > 0 ? tokens[tokens.length - 1] ?? null : null; } function endsAbruptly(content) { if (!content || !hasMeaningfulContent(content)) { return false; } const trimmed = content.trim(); const endsWithPunctuation = /[.!?;:]$/.test(trimmed); const endsWithClosure = /[)\]}]$/.test(trimmed); return !endsWithPunctuation && !endsWithClosure; } function chunkByTokens(content, chunkSize) { if (!content || !hasMeaningfulContent(content)) { return []; } const tokens = extractMeaningfulTokens(content); const chunks = []; for (let i = 0; i < tokens.length; i += chunkSize) { const chunk = tokens.slice(i, i + chunkSize).join(" "); chunks.push(chunk); } return chunks; } function detectOverlap(checkpoint, continuation, options = {}) { if (!checkpoint || !continuation || checkpoint.length === 0 || continuation.length === 0) { return { overlapLength: 0, overlapText: "", deduplicatedContinuation: continuation || "", hasOverlap: false }; } const { minOverlap = 2, maxOverlap = Math.min(500, continuation.length), caseSensitive = true, normalizeWhitespace = false } = options; let checkpointForMatch = checkpoint; let continuationForMatch = continuation; if (!caseSensitive) { checkpointForMatch = checkpoint.toLowerCase(); continuationForMatch = continuation.toLowerCase(); } if (normalizeWhitespace) { checkpointForMatch = checkpointForMatch.replace(/\s+/g, " "); continuationForMatch = continuationForMatch.replace(/\s+/g, " "); } const maxPossibleOverlap = Math.min( checkpointForMatch.length, continuationForMatch.length, maxOverlap ); if (maxPossibleOverlap < minOverlap) { return { overlapLength: 0, overlapText: "", deduplicatedContinuation: continuation, hasOverlap: false }; } for (let len = maxPossibleOverlap; len >= minOverlap; len--) { const suffix = checkpointForMatch.slice(-len); const prefix = continuationForMatch.slice(0, len); if (suffix === prefix) { let actualOverlapLength = len; if (normalizeWhitespace) { let normalizedPos = 0; let originalPos = 0; const normalizedPrefix = continuationForMatch.slice(0, len); while (normalizedPos < normalizedPrefix.length && originalPos < continuation.length) { if (/\s/.test(continuation[originalPos])) { if (normalizedPrefix[normalizedPos] === " ") { normalizedPos++; originalPos++; while (originalPos < continuation.length && /\s/.test(continuation[originalPos])) { originalPos++; } } else { originalPos++; } } else { normalizedPos++; originalPos++; } } actualOverlapLength = originalPos; } return { overlapLength: actualOverlapLength, overlapText: continuation.slice(0, actualOverlapLength), deduplicatedContinuation: continuation.slice(actualOverlapLength), hasOverlap: true }; } } return { overlapLength: 0, overlapText: "", deduplicatedContinuation: continuation, hasOverlap: false }; } function deduplicateContinuation(checkpoint, continuation, options = {}) { return detectOverlap(checkpoint, continuation, options).deduplicatedContinuation; } export { calculateTokenDensity, chunkByTokens, countMeaningfulTokens, deduplicateContinuation, detectOverlap, detectRepeatedTokens, endsAbruptly, estimateTokenCount, extractMeaningfulTokens, getFirstMeaningfulToken, getLastMeaningfulToken, hasMeaningfulContent, isAlphanumeric, isMeaningfulToken, isPunctuationOnly, normalizeToken, startsWithMeaningfulToken, tokensEqual }; //# sourceMappingURL=tokens.js.map