UNPKG

@ai2070/l0

Version:

L0: The Missing Reliability Substrate for AI

213 lines 7.21 kB
export function isMeaningfulToken(token) { if (!token || token.length === 0) { return false; } const trimmed = token.trim(); if (trimmed.length === 0) { return false; } if (/^[\r\n\t\s]+$/.test(token)) { return false; } return true; } export function hasMeaningfulContent(content) { if (!content || content.length === 0) { return false; } const trimmed = content.trim(); if (trimmed.length === 0) { return false; } if (/^[\r\n\t\s]+$/.test(content)) { return false; } return true; } export function countMeaningfulTokens(content) { if (!content || !hasMeaningfulContent(content)) { return 0; } const trimmed = content.trim(); const tokens = trimmed.split(/\s+/).filter((t) => t.length > 0); return tokens.length; } export function extractMeaningfulTokens(content) { if (!content || !hasMeaningfulContent(content)) { return []; } const trimmed = content.trim(); return trimmed.split(/\s+/).filter((t) => t.length > 0); } export function isPunctuationOnly(token) { if (!token || token.length === 0) { return false; } return /^[^\w\s]+$/.test(token); } export function isAlphanumeric(token) { if (!token || token.length === 0) { return false; } return /[a-zA-Z0-9]/.test(token); } export function normalizeToken(token) { return token.trim().toLowerCase(); } export function tokensEqual(token1, token2) { return normalizeToken(token1) === normalizeToken(token2); } export function detectRepeatedTokens(content, threshold = 3) { if (!content || !hasMeaningfulContent(content)) { return []; } const tokens = extractMeaningfulTokens(content); const repeated = []; const counts = new Map(); for (const token of tokens) { const normalized = normalizeToken(token); const count = (counts.get(normalized) || 0) + 1; counts.set(normalized, count); if (count === threshold) { repeated.push(token); } } return repeated; } export function calculateTokenDensity(content) { if (!content || content.length === 0) { return 0; } const tokenCount = countMeaningfulTokens(content); return tokenCount / content.length; } export function estimateTokenCount(content) { if (!content || content.length === 0) { return 0; } const wordCount = countMeaningfulTokens(content); const charEstimate = Math.ceil(content.length / 4); return Math.ceil((wordCount + charEstimate) / 2); } export function startsWithMeaningfulToken(content) { if (!content || content.length === 0) { return false; } const firstChar = content.trimStart()[0]; if (!firstChar) { return false; } return isMeaningfulToken(firstChar); } export function getFirstMeaningfulToken(content) { const tokens = extractMeaningfulTokens(content); return tokens.length > 0 ? (tokens[0] ?? null) : null; } export function getLastMeaningfulToken(content) { const tokens = extractMeaningfulTokens(content); return tokens.length > 0 ? (tokens[tokens.length - 1] ?? null) : null; } export function endsAbruptly(content) { if (!content || !hasMeaningfulContent(content)) { return false; } const trimmed = content.trim(); const endsWithPunctuation = /[.!?;:]$/.test(trimmed); const endsWithClosure = /[)\]}]$/.test(trimmed); return !endsWithPunctuation && !endsWithClosure; } export function chunkByTokens(content, chunkSize) { if (!content || !hasMeaningfulContent(content)) { return []; } const tokens = extractMeaningfulTokens(content); const chunks = []; for (let i = 0; i < tokens.length; i += chunkSize) { const chunk = tokens.slice(i, i + chunkSize).join(" "); chunks.push(chunk); } return chunks; } export function detectOverlap(checkpoint, continuation, options = {}) { if (!checkpoint || !continuation || checkpoint.length === 0 || continuation.length === 0) { return { overlapLength: 0, overlapText: "", deduplicatedContinuation: continuation || "", hasOverlap: false, }; } const { minOverlap = 2, maxOverlap = Math.min(500, continuation.length), caseSensitive = true, normalizeWhitespace = false, } = options; let checkpointForMatch = checkpoint; let continuationForMatch = continuation; if (!caseSensitive) { checkpointForMatch = checkpoint.toLowerCase(); continuationForMatch = continuation.toLowerCase(); } if (normalizeWhitespace) { checkpointForMatch = checkpointForMatch.replace(/\s+/g, " "); continuationForMatch = continuationForMatch.replace(/\s+/g, " "); } const maxPossibleOverlap = Math.min(checkpointForMatch.length, continuationForMatch.length, maxOverlap); if (maxPossibleOverlap < minOverlap) { return { overlapLength: 0, overlapText: "", deduplicatedContinuation: continuation, hasOverlap: false, }; } for (let len = maxPossibleOverlap; len >= minOverlap; len--) { const suffix = checkpointForMatch.slice(-len); const prefix = continuationForMatch.slice(0, len); if (suffix === prefix) { let actualOverlapLength = len; if (normalizeWhitespace) { let normalizedPos = 0; let originalPos = 0; const normalizedPrefix = continuationForMatch.slice(0, len); while (normalizedPos < normalizedPrefix.length && originalPos < continuation.length) { if (/\s/.test(continuation[originalPos])) { if (normalizedPrefix[normalizedPos] === " ") { normalizedPos++; originalPos++; while (originalPos < continuation.length && /\s/.test(continuation[originalPos])) { originalPos++; } } else { originalPos++; } } else { normalizedPos++; originalPos++; } } actualOverlapLength = originalPos; } return { overlapLength: actualOverlapLength, overlapText: continuation.slice(0, actualOverlapLength), deduplicatedContinuation: continuation.slice(actualOverlapLength), hasOverlap: true, }; } } return { overlapLength: 0, overlapText: "", deduplicatedContinuation: continuation, hasOverlap: false, }; } export function deduplicateContinuation(checkpoint, continuation, options = {}) { return detectOverlap(checkpoint, continuation, options) .deduplicatedContinuation; } //# sourceMappingURL=tokens.js.map