bpe-merge-visualizer
Version:
CLI tool to visualize Byte Pair Encoding (BPE) merge steps
44 lines (36 loc) • 971 B
JavaScript
function generateMockRules(word) {
const letters = word.split('');
const rules = [];
for (let i = 0; i < letters.length - 1; i++) {
const left = letters.slice(0, i + 1).join('');
const right = letters[i + 1];
rules.push([left, right]);
}
return rules;
}
function applyBPERules(input) {
let tokens = input.split("");
const steps = [tokens.slice()];
generateMockRules(input).forEach(([a, b]) => {
let i = 0;
const newTokens = [];
while (i < tokens.length) {
if (i < tokens.length - 1 && tokens[i] + tokens[i + 1] === a + b) {
newTokens.push(a + b);
i += 2;
} else {
newTokens.push(tokens[i]);
i += 1;
}
}
tokens = newTokens;
steps.push(tokens.slice());
});
return steps;
}
export function visualizeBPE(word) {
const steps = applyBPERules(word);
steps.forEach((step, index) => {
console.log(`Step ${index}:`, step.map(t => `[${t}]`).join(" "));
});
}