UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

298 lines (296 loc) 8.91 kB
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License class DiffChecker { a; b; options; entries = []; grouped = []; diffRun = false; constructor(a, b, opt = {}) { ((this.a = a), (this.b = b)); this.options = { ...{ mode: 'word', caseInsensitive: false, contextLines: 1, groupedLines: true, expandLines: false, showChangeMagnitude: true, maxMagnitudeSymbols: 5, lineBreak: '\n' }, ...opt }; this.computeDiff(); } text2lines() { const linesA = this.a.trim().split(/\r?\n/); const linesB = this.b.trim().split(/\r?\n/); return { linesA, linesB, maxLen: Math.max(linesA.length, linesB.length) }; } tokenize(input) { switch (this.options.mode) { case 'line': return [input]; case 'word': return input.split(/\s+/); } } concat(input) { return input.join(this.options.mode === 'word' ? ' ' : ''); } computeDiff() { if (this.diffRun) return; const { linesA, linesB, maxLen } = this.text2lines(); for (let i = 0; i < maxLen; i++) this.lineDiff(linesA[i] || '', linesB[i] || '', i); this.findGroups(); this.diffRun = true; } lineDiff(a, b, line) { const { mode, caseInsensitive } = this.options; const baseLen = Math.max(a.length, b.length); let A = a, B = b; if (caseInsensitive) ((A = a.toLowerCase()), (B = b.toLowerCase())); let diffs = []; let delSize = 0, insSize = 0; switch (mode) { case 'line': if (A !== B) { diffs.push({ posA: 0, posB: 0, del: a, ins: b, size: b.length - a.length }); delSize = a.length; insSize = b.length; } break; case 'word': diffs = this.preciseDiff(a, A, b, B); for (const d of diffs) ((delSize += d.del.length), (insSize += d.ins.length)); break; } if (diffs.length) this.entries.push({ line, diffs, delSize, insSize, baseLen, totalSize: insSize - delSize, magnitude: this.magnitude(delSize, insSize, baseLen) }); } preciseDiff(a, A, b, B) { const posIndex = (t) => t.reduce( (p, _, i) => (p.push(i ? p[i - 1] + t[i - 1].length + 1 : 0), p), [] ); const origA = this.tokenize(a); const origB = this.tokenize(b); const tokenA = this.tokenize(A); const tokenB = this.tokenize(B); const lenA = tokenA.length; const lenB = tokenB.length; const posArrA = posIndex(origA); const posArrB = posIndex(origB); const matches = []; let ai = 0, bi = 0; while (ai < lenA && bi < lenB) { if (tokenA[ai] === tokenB[bi]) { let len = 1; while ( ai + len < lenA && bi + len < lenB && tokenA[ai + len] === tokenB[bi + len] ) len++; matches.push({ ai, bi, len }); ((ai += len), (bi += len)); } else { let found = false; for (let offset = 1; offset <= 3 && !found; offset++) { if (ai + offset < lenA && tokenA[ai + offset] === tokenB[bi]) { matches.push({ ai: ai + offset, bi, len: 1 }); ((ai += offset + 1), (bi += 1), (found = true)); } else if (bi + offset < lenB && tokenA[ai] === tokenB[bi + offset]) { matches.push({ ai, bi: bi + offset, len: 1 }); ((ai += 1), (bi += offset + 1), (found = true)); } } if (!found) (ai++, bi++); } } const diffs = []; let i = 0, j = 0; for (const m of matches) { if (i < m.ai || j < m.bi) { const delArr = origA.slice(i, m.ai); const insArr = origB.slice(j, m.bi); diffs.push({ posA: posArrA[i] ?? 0, posB: posArrB[j] ?? 0, del: this.concat(delArr), ins: this.concat(insArr), size: insArr.join('').length - delArr.join('').length }); } ((i = m.ai + m.len), (j = m.bi + m.len)); } if (i < lenA || j < lenB) { const delArr = origA.slice(i); const insArr = origB.slice(j); diffs.push({ posA: posArrA[i] ?? 0, posB: posArrB[j] ?? 0, del: this.concat(delArr), ins: this.concat(insArr), size: insArr.join('').length - delArr.join('').length }); } return diffs.filter((d) => d.del.length > 0 || d.ins.length > 0); } findGroups() { const { contextLines } = this.options; const addGroup = (group, start, end) => { const [delSize, insSize, totalSize, baseLen] = [ 'delSize', 'insSize', 'totalSize', 'baseLen' ].map((k) => group.reduce((sum, e) => sum + e[k], 0)); this.grouped.push({ start, end, delSize, insSize, totalSize, line: group[0].line, entries: group, magnitude: this.magnitude(delSize, insSize, baseLen) }); }; let group = []; let start = 0, end = 0; for (const entry of this.entries) { const s = Math.max(0, entry.line - contextLines); const e = entry.line + contextLines; if (!group.length || s <= end + 1) { if (!group.length) start = s; end = Math.max(end, e); group.push(entry); } else { addGroup(group, start, end); ((group = [entry]), (start = s), (end = e)); } } if (group.length) addGroup(group, start, end); } magnitude(del, ins, baseLen) { const { maxMagnitudeSymbols } = this.options; const total = del + ins; if (total === 0 || baseLen === 0) return ''; const magLen = Math.min( maxMagnitudeSymbols, Math.max(Math.round((total / baseLen) * maxMagnitudeSymbols), 1) ); const plus = Math.round((ins / total) * magLen); const minus = magLen - plus; return '+'.repeat(plus) + '-'.repeat(minus); } output(cli) { const { mode, contextLines, groupedLines, expandLines, showChangeMagnitude, lineBreak } = this.options; const { linesA, linesB, maxLen } = this.text2lines(); const linePad = Math.max(4, maxLen.toString().length); const highlight = (s, ansi) => (cli ? `\x1b[${ansi}m${s}\x1b[0m` : s); const cy = (s) => highlight(s, '36'); const gy = (s) => highlight(s, '90'); const gn = (s) => highlight(s, '32'); const rd = (s) => highlight(s, '31'); const ye = (s) => highlight(s, '33'); const del = (s) => (cli ? `\x1b[37;41m${s}\x1b[31;49m` : `-[${s}]`); const ins = (s) => (cli ? `\x1b[37;42m${s}\x1b[32;49m` : `+[${s}]`); const block = (start, end, forced, headerEntry) => { if (headerEntry) header(headerEntry); for (let i = start; i <= end; i++) line(i, forced ?? i); out.push(''); }; const header = (e) => { out.push( `${' '.repeat(linePad)} ${cy(`@@ -${e.line + 1},${e.delSize} +${e.line + 1},${e.insSize} @@`)} ${showChangeMagnitude ? ye(e.magnitude) : ''}` ); }; const line = (i, forced) => { if (linesA[i] || linesB[i]) { const entry = this.entries.find((e) => e.line === i); const lineNo = (i + 1).toString().padStart(linePad, ' '); if (entry && forced === i) { out.push( `${lineNo} ${rd(`- ${mark(linesA[i], entry.diffs, 'del')}`)}` ); out.push( `${' '.repeat(linePad)} ${gn(`+ ${mark(linesB[i], entry.diffs, 'ins')}`)}` ); } else { out.push(`${lineNo} ${gy(linesA[i])}`); } } }; const mark = (line, diffs, type) => { if (!diffs.length || mode === 'line') return line; let res = '', idx = 0; for (const d of diffs) { const pos = type === 'del' ? d.posA : d.posB; const val = type === 'del' ? d.del : d.ins; if (!val) continue; if (pos > idx) res += line.slice(idx, pos); res += type === 'del' ? del(val) : ins(val); idx = pos + val.length; } return res + line.slice(idx); }; let out = ['']; switch (true) { case expandLines: block(0, maxLen); break; case groupedLines: for (const group of this.grouped) block(group.start, group.end, undefined, group); break; default: for (const entry of this.entries) block( entry.line - contextLines, entry.line + contextLines, entry.line, entry ); break; } return out.join(lineBreak); } getStructuredDiff = () => this.entries; getGroupedDiff = () => this.grouped; getASCIIDiff = () => this.output(false); getCLIDiff = () => this.output(true); } export { DiffChecker };