UNPKG

bpe-merge-visualizer

Version:

CLI tool to visualize Byte Pair Encoding (BPE) merge steps

44 lines (36 loc) 971 B
function generateMockRules(word) { const letters = word.split(''); const rules = []; for (let i = 0; i < letters.length - 1; i++) { const left = letters.slice(0, i + 1).join(''); const right = letters[i + 1]; rules.push([left, right]); } return rules; } function applyBPERules(input) { let tokens = input.split(""); const steps = [tokens.slice()]; generateMockRules(input).forEach(([a, b]) => { let i = 0; const newTokens = []; while (i < tokens.length) { if (i < tokens.length - 1 && tokens[i] + tokens[i + 1] === a + b) { newTokens.push(a + b); i += 2; } else { newTokens.push(tokens[i]); i += 1; } } tokens = newTokens; steps.push(tokens.slice()); }); return steps; } export function visualizeBPE(word) { const steps = applyBPERules(word); steps.forEach((step, index) => { console.log(`Step ${index}:`, step.map(t => `[${t}]`).join(" ")); }); }