cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
298 lines (296 loc) • 8.91 kB
JavaScript
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
class DiffChecker {
a;
b;
options;
entries = [];
grouped = [];
diffRun = false;
constructor(a, b, opt = {}) {
((this.a = a), (this.b = b));
this.options = {
...{
mode: 'word',
caseInsensitive: false,
contextLines: 1,
groupedLines: true,
expandLines: false,
showChangeMagnitude: true,
maxMagnitudeSymbols: 5,
lineBreak: '\n'
},
...opt
};
this.computeDiff();
}
text2lines() {
const linesA = this.a.trim().split(/\r?\n/);
const linesB = this.b.trim().split(/\r?\n/);
return { linesA, linesB, maxLen: Math.max(linesA.length, linesB.length) };
}
tokenize(input) {
switch (this.options.mode) {
case 'line':
return [input];
case 'word':
return input.split(/\s+/);
}
}
concat(input) {
return input.join(this.options.mode === 'word' ? ' ' : '');
}
computeDiff() {
if (this.diffRun) return;
const { linesA, linesB, maxLen } = this.text2lines();
for (let i = 0; i < maxLen; i++)
this.lineDiff(linesA[i] || '', linesB[i] || '', i);
this.findGroups();
this.diffRun = true;
}
lineDiff(a, b, line) {
const { mode, caseInsensitive } = this.options;
const baseLen = Math.max(a.length, b.length);
let A = a,
B = b;
if (caseInsensitive) ((A = a.toLowerCase()), (B = b.toLowerCase()));
let diffs = [];
let delSize = 0,
insSize = 0;
switch (mode) {
case 'line':
if (A !== B) {
diffs.push({
posA: 0,
posB: 0,
del: a,
ins: b,
size: b.length - a.length
});
delSize = a.length;
insSize = b.length;
}
break;
case 'word':
diffs = this.preciseDiff(a, A, b, B);
for (const d of diffs)
((delSize += d.del.length), (insSize += d.ins.length));
break;
}
if (diffs.length)
this.entries.push({
line,
diffs,
delSize,
insSize,
baseLen,
totalSize: insSize - delSize,
magnitude: this.magnitude(delSize, insSize, baseLen)
});
}
preciseDiff(a, A, b, B) {
const posIndex = (t) =>
t.reduce(
(p, _, i) => (p.push(i ? p[i - 1] + t[i - 1].length + 1 : 0), p),
[]
);
const origA = this.tokenize(a);
const origB = this.tokenize(b);
const tokenA = this.tokenize(A);
const tokenB = this.tokenize(B);
const lenA = tokenA.length;
const lenB = tokenB.length;
const posArrA = posIndex(origA);
const posArrB = posIndex(origB);
const matches = [];
let ai = 0,
bi = 0;
while (ai < lenA && bi < lenB) {
if (tokenA[ai] === tokenB[bi]) {
let len = 1;
while (
ai + len < lenA &&
bi + len < lenB &&
tokenA[ai + len] === tokenB[bi + len]
)
len++;
matches.push({ ai, bi, len });
((ai += len), (bi += len));
} else {
let found = false;
for (let offset = 1; offset <= 3 && !found; offset++) {
if (ai + offset < lenA && tokenA[ai + offset] === tokenB[bi]) {
matches.push({ ai: ai + offset, bi, len: 1 });
((ai += offset + 1), (bi += 1), (found = true));
} else if (bi + offset < lenB && tokenA[ai] === tokenB[bi + offset]) {
matches.push({ ai, bi: bi + offset, len: 1 });
((ai += 1), (bi += offset + 1), (found = true));
}
}
if (!found) (ai++, bi++);
}
}
const diffs = [];
let i = 0,
j = 0;
for (const m of matches) {
if (i < m.ai || j < m.bi) {
const delArr = origA.slice(i, m.ai);
const insArr = origB.slice(j, m.bi);
diffs.push({
posA: posArrA[i] ?? 0,
posB: posArrB[j] ?? 0,
del: this.concat(delArr),
ins: this.concat(insArr),
size: insArr.join('').length - delArr.join('').length
});
}
((i = m.ai + m.len), (j = m.bi + m.len));
}
if (i < lenA || j < lenB) {
const delArr = origA.slice(i);
const insArr = origB.slice(j);
diffs.push({
posA: posArrA[i] ?? 0,
posB: posArrB[j] ?? 0,
del: this.concat(delArr),
ins: this.concat(insArr),
size: insArr.join('').length - delArr.join('').length
});
}
return diffs.filter((d) => d.del.length > 0 || d.ins.length > 0);
}
findGroups() {
const { contextLines } = this.options;
const addGroup = (group, start, end) => {
const [delSize, insSize, totalSize, baseLen] = [
'delSize',
'insSize',
'totalSize',
'baseLen'
].map((k) => group.reduce((sum, e) => sum + e[k], 0));
this.grouped.push({
start,
end,
delSize,
insSize,
totalSize,
line: group[0].line,
entries: group,
magnitude: this.magnitude(delSize, insSize, baseLen)
});
};
let group = [];
let start = 0,
end = 0;
for (const entry of this.entries) {
const s = Math.max(0, entry.line - contextLines);
const e = entry.line + contextLines;
if (!group.length || s <= end + 1) {
if (!group.length) start = s;
end = Math.max(end, e);
group.push(entry);
} else {
addGroup(group, start, end);
((group = [entry]), (start = s), (end = e));
}
}
if (group.length) addGroup(group, start, end);
}
magnitude(del, ins, baseLen) {
const { maxMagnitudeSymbols } = this.options;
const total = del + ins;
if (total === 0 || baseLen === 0) return '';
const magLen = Math.min(
maxMagnitudeSymbols,
Math.max(Math.round((total / baseLen) * maxMagnitudeSymbols), 1)
);
const plus = Math.round((ins / total) * magLen);
const minus = magLen - plus;
return '+'.repeat(plus) + '-'.repeat(minus);
}
output(cli) {
const {
mode,
contextLines,
groupedLines,
expandLines,
showChangeMagnitude,
lineBreak
} = this.options;
const { linesA, linesB, maxLen } = this.text2lines();
const linePad = Math.max(4, maxLen.toString().length);
const highlight = (s, ansi) => (cli ? `\x1b[${ansi}m${s}\x1b[0m` : s);
const cy = (s) => highlight(s, '36');
const gy = (s) => highlight(s, '90');
const gn = (s) => highlight(s, '32');
const rd = (s) => highlight(s, '31');
const ye = (s) => highlight(s, '33');
const del = (s) => (cli ? `\x1b[37;41m${s}\x1b[31;49m` : `-[${s}]`);
const ins = (s) => (cli ? `\x1b[37;42m${s}\x1b[32;49m` : `+[${s}]`);
const block = (start, end, forced, headerEntry) => {
if (headerEntry) header(headerEntry);
for (let i = start; i <= end; i++) line(i, forced ?? i);
out.push('');
};
const header = (e) => {
out.push(
`${' '.repeat(linePad)} ${cy(`@@ -${e.line + 1},${e.delSize} +${e.line + 1},${e.insSize} @@`)} ${showChangeMagnitude ? ye(e.magnitude) : ''}`
);
};
const line = (i, forced) => {
if (linesA[i] || linesB[i]) {
const entry = this.entries.find((e) => e.line === i);
const lineNo = (i + 1).toString().padStart(linePad, ' ');
if (entry && forced === i) {
out.push(
`${lineNo} ${rd(`- ${mark(linesA[i], entry.diffs, 'del')}`)}`
);
out.push(
`${' '.repeat(linePad)} ${gn(`+ ${mark(linesB[i], entry.diffs, 'ins')}`)}`
);
} else {
out.push(`${lineNo} ${gy(linesA[i])}`);
}
}
};
const mark = (line, diffs, type) => {
if (!diffs.length || mode === 'line') return line;
let res = '',
idx = 0;
for (const d of diffs) {
const pos = type === 'del' ? d.posA : d.posB;
const val = type === 'del' ? d.del : d.ins;
if (!val) continue;
if (pos > idx) res += line.slice(idx, pos);
res += type === 'del' ? del(val) : ins(val);
idx = pos + val.length;
}
return res + line.slice(idx);
};
let out = [''];
switch (true) {
case expandLines:
block(0, maxLen);
break;
case groupedLines:
for (const group of this.grouped)
block(group.start, group.end, undefined, group);
break;
default:
for (const entry of this.entries)
block(
entry.line - contextLines,
entry.line + contextLines,
entry.line,
entry
);
break;
}
return out.join(lineBreak);
}
getStructuredDiff = () => this.entries;
getGroupedDiff = () => this.grouped;
getASCIIDiff = () => this.output(false);
getCLIDiff = () => this.output(true);
}
export { DiffChecker };