@ai2070/l0
Version:
L0: The Missing Reliability Substrate for AI
213 lines • 7.21 kB
JavaScript
export function isMeaningfulToken(token) {
if (!token || token.length === 0) {
return false;
}
const trimmed = token.trim();
if (trimmed.length === 0) {
return false;
}
if (/^[\r\n\t\s]+$/.test(token)) {
return false;
}
return true;
}
export function hasMeaningfulContent(content) {
if (!content || content.length === 0) {
return false;
}
const trimmed = content.trim();
if (trimmed.length === 0) {
return false;
}
if (/^[\r\n\t\s]+$/.test(content)) {
return false;
}
return true;
}
export function countMeaningfulTokens(content) {
if (!content || !hasMeaningfulContent(content)) {
return 0;
}
const trimmed = content.trim();
const tokens = trimmed.split(/\s+/).filter((t) => t.length > 0);
return tokens.length;
}
export function extractMeaningfulTokens(content) {
if (!content || !hasMeaningfulContent(content)) {
return [];
}
const trimmed = content.trim();
return trimmed.split(/\s+/).filter((t) => t.length > 0);
}
export function isPunctuationOnly(token) {
if (!token || token.length === 0) {
return false;
}
return /^[^\w\s]+$/.test(token);
}
export function isAlphanumeric(token) {
if (!token || token.length === 0) {
return false;
}
return /[a-zA-Z0-9]/.test(token);
}
export function normalizeToken(token) {
return token.trim().toLowerCase();
}
export function tokensEqual(token1, token2) {
return normalizeToken(token1) === normalizeToken(token2);
}
export function detectRepeatedTokens(content, threshold = 3) {
if (!content || !hasMeaningfulContent(content)) {
return [];
}
const tokens = extractMeaningfulTokens(content);
const repeated = [];
const counts = new Map();
for (const token of tokens) {
const normalized = normalizeToken(token);
const count = (counts.get(normalized) || 0) + 1;
counts.set(normalized, count);
if (count === threshold) {
repeated.push(token);
}
}
return repeated;
}
export function calculateTokenDensity(content) {
if (!content || content.length === 0) {
return 0;
}
const tokenCount = countMeaningfulTokens(content);
return tokenCount / content.length;
}
export function estimateTokenCount(content) {
if (!content || content.length === 0) {
return 0;
}
const wordCount = countMeaningfulTokens(content);
const charEstimate = Math.ceil(content.length / 4);
return Math.ceil((wordCount + charEstimate) / 2);
}
export function startsWithMeaningfulToken(content) {
if (!content || content.length === 0) {
return false;
}
const firstChar = content.trimStart()[0];
if (!firstChar) {
return false;
}
return isMeaningfulToken(firstChar);
}
export function getFirstMeaningfulToken(content) {
const tokens = extractMeaningfulTokens(content);
return tokens.length > 0 ? (tokens[0] ?? null) : null;
}
export function getLastMeaningfulToken(content) {
const tokens = extractMeaningfulTokens(content);
return tokens.length > 0 ? (tokens[tokens.length - 1] ?? null) : null;
}
export function endsAbruptly(content) {
if (!content || !hasMeaningfulContent(content)) {
return false;
}
const trimmed = content.trim();
const endsWithPunctuation = /[.!?;:]$/.test(trimmed);
const endsWithClosure = /[)\]}]$/.test(trimmed);
return !endsWithPunctuation && !endsWithClosure;
}
export function chunkByTokens(content, chunkSize) {
if (!content || !hasMeaningfulContent(content)) {
return [];
}
const tokens = extractMeaningfulTokens(content);
const chunks = [];
for (let i = 0; i < tokens.length; i += chunkSize) {
const chunk = tokens.slice(i, i + chunkSize).join(" ");
chunks.push(chunk);
}
return chunks;
}
export function detectOverlap(checkpoint, continuation, options = {}) {
if (!checkpoint ||
!continuation ||
checkpoint.length === 0 ||
continuation.length === 0) {
return {
overlapLength: 0,
overlapText: "",
deduplicatedContinuation: continuation || "",
hasOverlap: false,
};
}
const { minOverlap = 2, maxOverlap = Math.min(500, continuation.length), caseSensitive = true, normalizeWhitespace = false, } = options;
let checkpointForMatch = checkpoint;
let continuationForMatch = continuation;
if (!caseSensitive) {
checkpointForMatch = checkpoint.toLowerCase();
continuationForMatch = continuation.toLowerCase();
}
if (normalizeWhitespace) {
checkpointForMatch = checkpointForMatch.replace(/\s+/g, " ");
continuationForMatch = continuationForMatch.replace(/\s+/g, " ");
}
const maxPossibleOverlap = Math.min(checkpointForMatch.length, continuationForMatch.length, maxOverlap);
if (maxPossibleOverlap < minOverlap) {
return {
overlapLength: 0,
overlapText: "",
deduplicatedContinuation: continuation,
hasOverlap: false,
};
}
for (let len = maxPossibleOverlap; len >= minOverlap; len--) {
const suffix = checkpointForMatch.slice(-len);
const prefix = continuationForMatch.slice(0, len);
if (suffix === prefix) {
let actualOverlapLength = len;
if (normalizeWhitespace) {
let normalizedPos = 0;
let originalPos = 0;
const normalizedPrefix = continuationForMatch.slice(0, len);
while (normalizedPos < normalizedPrefix.length &&
originalPos < continuation.length) {
if (/\s/.test(continuation[originalPos])) {
if (normalizedPrefix[normalizedPos] === " ") {
normalizedPos++;
originalPos++;
while (originalPos < continuation.length &&
/\s/.test(continuation[originalPos])) {
originalPos++;
}
}
else {
originalPos++;
}
}
else {
normalizedPos++;
originalPos++;
}
}
actualOverlapLength = originalPos;
}
return {
overlapLength: actualOverlapLength,
overlapText: continuation.slice(0, actualOverlapLength),
deduplicatedContinuation: continuation.slice(actualOverlapLength),
hasOverlap: true,
};
}
}
return {
overlapLength: 0,
overlapText: "",
deduplicatedContinuation: continuation,
hasOverlap: false,
};
}
export function deduplicateContinuation(checkpoint, continuation, options = {}) {
return detectOverlap(checkpoint, continuation, options)
.deduplicatedContinuation;
}
//# sourceMappingURL=tokens.js.map