tabular-data-differ
Version:
A very efficient library for diffing two sorted streams of tabular data, such as CSV files.
555 lines • 16.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.NullFormatWriter = exports.JsonFormatWriter = exports.CsvFormatWriter = exports.IterableFormatReader = exports.JsonFormatReader = exports.CsvFormatReader = exports.TextFormatWriter = exports.StreamFormatWriter = exports.TextFormatReader = exports.StreamFormatReader = exports.BufferedFormatReader = exports.DiffStats = exports.defaultStatusColumnName = void 0;
exports.parseJsonObj = parseJsonObj;
exports.convertJsonObjToRow = convertJsonObjToRow;
exports.parseCsvLine = parseCsvLine;
exports.serializeCsvField = serializeCsvField;
exports.serializeRowAsCsvLine = serializeRowAsCsvLine;
exports.stringComparer = stringComparer;
exports.numberComparer = numberComparer;
exports.cellComparer = cellComparer;
exports.defaultRowComparer = defaultRowComparer;
exports.roundDecimals = roundDecimals;
const streams_1 = require("./streams");
exports.defaultStatusColumnName = 'DIFF_STATUS';
class DiffStats {
constructor() {
this.totalComparisons = 0;
this.totalChanges = 0;
this.changePercent = 0;
this.added = 0;
this.deleted = 0;
this.modified = 0;
this.same = 0;
}
add(rowDiff) {
this.totalComparisons++;
if (rowDiff.status === 'added') {
this.added++;
this.totalChanges++;
}
else if (rowDiff.status === 'deleted') {
this.deleted++;
this.totalChanges++;
}
else if (rowDiff.status === 'modified') {
this.modified++;
this.totalChanges++;
}
else if (rowDiff.status === 'same') {
this.same++;
}
this.changePercent = roundDecimals((this.totalChanges / this.totalComparisons) * 100, 2);
}
}
exports.DiffStats = DiffStats;
class BufferedFormatReader {
constructor(reader) {
this.reader = reader;
this.hasPeekedRow = false;
}
open() {
this.hasPeekedRow = false;
this.peekedRow = undefined;
return this.reader.open();
}
readHeader() {
return this.reader.readHeader();
}
async peekRow() {
if (this.hasPeekedRow) {
return this.peekedRow;
}
this.peekedRow = await this.reader.readRow();
this.hasPeekedRow = true;
return this.peekedRow;
}
async readRow() {
if (this.hasPeekedRow) {
const result = this.peekedRow;
this.peekedRow = undefined;
this.hasPeekedRow = false;
return result;
}
return await this.reader.readRow();
}
close() {
return this.reader.close();
}
}
exports.BufferedFormatReader = BufferedFormatReader;
class StreamFormatReader {
constructor(options) {
this.stream = (0, streams_1.getOrCreateInputStream)(options.stream);
this.encoding = options.encoding;
}
async open() {
await this.stream.open();
}
async close() {
await this.stream.close();
}
}
exports.StreamFormatReader = StreamFormatReader;
class TextFormatReader extends StreamFormatReader {
get textReader() {
if (!this._textReader) {
throw new Error('Cannot access textReader because stream is not open');
}
return this._textReader;
}
async open() {
await super.open();
this._textReader = this.stream.createTextReader({ encoding: this.encoding });
}
async close() {
if (this.textReader) {
await this.textReader.close();
this._textReader = undefined;
}
await super.close();
}
}
exports.TextFormatReader = TextFormatReader;
class StreamFormatWriter {
constructor(options) {
this.stream = (0, streams_1.getOrCreateOutputStream)(options.stream);
this.encoding = options.encoding;
}
async open() {
await this.stream.open();
}
async close() {
await this.stream.close();
}
}
exports.StreamFormatWriter = StreamFormatWriter;
class TextFormatWriter extends StreamFormatWriter {
get textWriter() {
if (!this._textWriter) {
throw new Error('Cannot access textWriter because stream is not open');
}
return this._textWriter;
}
async open() {
await super.open();
this._textWriter = this.stream.createTextWriter({ encoding: this.encoding });
}
async close() {
if (this.textWriter) {
await this.textWriter.close();
this._textWriter = undefined;
}
await super.close();
}
}
exports.TextFormatWriter = TextFormatWriter;
class CsvFormatReader extends TextFormatReader {
constructor(options) {
super(options);
this.delimiter = options.delimiter ?? ',';
}
async readHeader() {
return {
columns: parseCsvLine(this.delimiter, await this.textReader.readLine()) ?? [],
};
}
async readRow() {
return parseCsvLine(this.delimiter, await this.textReader.readLine());
}
}
exports.CsvFormatReader = CsvFormatReader;
class JsonFormatReader extends TextFormatReader {
constructor(options) {
super(options);
this.columns = [];
}
async open() {
this.headerObj = null;
this.columns = [];
await super.open();
}
async readHeader() {
let line = await this.textReader.readLine();
this.headerObj = parseJsonObj(line);
if (!this.headerObj) {
// if the obj is undefined, it might mean that we just started an array with a single line containing '['
// so, process the next line
line = await this.textReader.readLine();
this.headerObj = parseJsonObj(line);
}
if (!this.headerObj) {
throw new Error('Expected to find at least one object');
}
this.columns = Object.keys(this.headerObj);
return {
columns: this.columns,
};
}
async readRow() {
if (this.headerObj) {
const row = convertJsonObjToRow(this.headerObj, this.columns);
this.headerObj = null;
return row;
}
const line = await this.textReader.readLine();
const obj = parseJsonObj(line);
const row = convertJsonObjToRow(obj, this.columns);
return row;
}
}
exports.JsonFormatReader = JsonFormatReader;
class IterableFormatReader {
constructor(options) {
this.columns = [];
this.iterable = options.provider;
}
open() {
if (this.iterator) {
throw new Error('Reader is already open!');
}
this.headerObj = null;
this.columns = [];
this.iterator = this.iterable()[Symbol.asyncIterator]();
return Promise.resolve();
}
async readHeader() {
if (!this.headerObj) {
this.headerObj = await this.nextItem();
}
if (!this.headerObj) {
throw new Error('Expected to find at least one object');
}
this.columns = Object.keys(this.headerObj);
return {
columns: this.columns,
};
}
async readRow() {
if (this.headerObj) {
const row = convertJsonObjToRow(this.headerObj, this.columns);
this.headerObj = null;
return row;
}
const obj = await this.nextItem();
const row = convertJsonObjToRow(obj, this.columns);
return row;
}
close() {
if (this.iterator) {
if (this.iterator.return) {
this.iterator.return();
}
this.iterator = undefined;
}
return Promise.resolve();
}
async nextItem() {
if (!this.iterator) {
throw new Error('You must call open before reading content!');
}
const res = await this.iterator.next();
if (res.done) {
this.iterator = undefined;
return undefined;
}
return res.value;
}
}
exports.IterableFormatReader = IterableFormatReader;
class CsvFormatWriter extends TextFormatWriter {
constructor(options) {
super(options);
this.delimiter = options.delimiter ?? ',';
this.keepOldValues = options.keepOldValues ?? false;
this.statusColumnName = options.statusColumnName ?? exports.defaultStatusColumnName;
}
writeHeader(header) {
const columns = [this.statusColumnName, ...header.columns];
if (this.keepOldValues) {
columns.push(...header.columns.map(col => 'OLD_' + col));
}
return this.textWriter.writeLine(serializeRowAsCsvLine(columns, this.delimiter));
}
async writeDiff(rowDiff) {
if (rowDiff.oldRow && rowDiff.newRow) {
const row = [rowDiff.status, ...rowDiff.newRow];
if (this.keepOldValues) {
row.push(...rowDiff.oldRow);
}
await this.textWriter.writeLine(serializeRowAsCsvLine(row, this.delimiter));
}
else if (rowDiff.oldRow) {
if (this.keepOldValues) {
const emptyRow = rowDiff.oldRow.map(_ => '');
await this.textWriter.writeLine(serializeRowAsCsvLine([rowDiff.status, ...emptyRow, ...rowDiff.oldRow], this.delimiter));
}
else {
await this.textWriter.writeLine(serializeRowAsCsvLine([rowDiff.status, ...rowDiff.oldRow], this.delimiter));
}
}
else if (rowDiff.newRow) {
const row = [rowDiff.status, ...rowDiff.newRow];
if (this.keepOldValues) {
const emptyRow = rowDiff.newRow.map(_ => '');
row.push(...emptyRow);
}
await this.textWriter.writeLine(serializeRowAsCsvLine(row, this.delimiter));
}
}
writeFooter(footer) {
return Promise.resolve();
}
}
exports.CsvFormatWriter = CsvFormatWriter;
class JsonFormatWriter extends TextFormatWriter {
constructor(options) {
super(options);
this.rowCount = 0;
this.keepOldValues = options.keepOldValues ?? false;
}
writeHeader(header) {
this.rowCount = 0;
const h = JSON.stringify(header);
return this.textWriter.writeLine(`{ "header": ${h}, "items": [`);
}
writeDiff(rowDiff) {
const record = {
status: rowDiff.status,
};
if (this.keepOldValues) {
if (rowDiff.newRow) {
record.new = rowDiff.newRow;
}
if (rowDiff.oldRow) {
record.old = rowDiff.oldRow;
}
}
else {
record.data = rowDiff.newRow ?? rowDiff.oldRow;
}
const separator = this.rowCount === 0 ? '' : ',';
this.rowCount++;
return this.textWriter.writeLine(separator + JSON.stringify(record));
}
writeFooter(footer) {
return this.textWriter.writeLine(`], "footer": ${JSON.stringify(footer)}}`);
}
}
exports.JsonFormatWriter = JsonFormatWriter;
class NullFormatWriter {
open() {
return Promise.resolve();
}
writeHeader(header) {
return Promise.resolve();
}
writeDiff(rowDiff) {
return Promise.resolve();
}
writeFooter(footer) {
return Promise.resolve();
}
close() {
return Promise.resolve();
}
}
exports.NullFormatWriter = NullFormatWriter;
function parseJsonObj(line) {
if (line === undefined) {
return undefined;
}
let text = line.trim();
if (text.startsWith('[')) {
text = text.substring(1);
}
if (text.endsWith(']')) {
text = text.substring(0, text.length - 1);
}
if (text.startsWith(',')) {
text = text.substring(1);
}
if (text.endsWith(',')) {
text = text.substring(0, text.length - 1);
}
if (text === '') {
return undefined;
}
if (text.startsWith('{') && text.endsWith('}')) {
const obj = JSON.parse(text);
return obj;
}
throw new Error('Expected to find a JSON object');
}
function convertJsonObjToRow(obj, columns) {
if (obj === null || obj === undefined) {
return undefined;
}
const row = columns.map(col => {
const val = obj[col];
if (val === null || typeof val === 'number' || typeof val === 'boolean') {
return val;
}
return `${val}`;
});
return row;
}
function parseCsvLine(delimiter, line) {
if (line) {
const row = [];
let idx = 0;
let prevIdx = 0;
let c = '';
while (idx < line.length) {
c = line[idx];
if (c === '"') {
idx++;
let hasEscapedDoubleQuote = false;
const startIdx = idx;
while (idx < line.length) {
if (line[idx] === '"' && idx < line.length - 1 && line[idx + 1] === '"') {
// skip escaped double quotes
idx++;
hasEscapedDoubleQuote = true;
}
else if (line[idx] === '"') {
break;
}
idx++;
}
let value = line.substring(startIdx, idx);
if (hasEscapedDoubleQuote) {
value = value.replaceAll('""', '"');
}
row.push(value);
idx++;
if (line[idx] === delimiter) {
idx++;
}
prevIdx = idx;
}
else if (c === delimiter) {
const value = line.substring(prevIdx, idx);
row.push(value);
idx++;
prevIdx = idx;
}
else {
idx++;
}
}
if (prevIdx < idx) {
const value = line.substring(prevIdx, idx);
row.push(value);
}
else if (c === delimiter) {
row.push('');
}
return row;
}
}
const charsToEncodeRegEx = /,|"/;
function serializeCsvField(value) {
if (value === null) {
return '';
}
if (typeof value === 'string' && charsToEncodeRegEx.test(value)) {
return `"${value.replaceAll('"', '""')}"`;
}
return value.toString();
}
function serializeRowAsCsvLine(row, delimiter) {
return row.map(serializeCsvField).join(delimiter ?? ',');
}
function stringComparer(a, b) {
// We can't use localeCompare since the ordered csv file produced by SQLite won't use the same locale
// return a.localeCompare(b)
const aa = a === null ? '' : a.toString();
const bb = b === null ? '' : b.toString();
if (aa === bb) {
return 0;
}
else if (aa < bb) {
return -1;
}
return 1;
}
function numberComparer(a, b) {
if (a === b) {
return 0;
}
if (a === null && b !== null) {
return -1;
}
if (a !== null && b === null) {
return 1;
}
if (typeof a === 'number' && typeof b === 'number') {
return a < b ? -1 : 1;
}
if (typeof a === 'boolean' && typeof b === 'boolean') {
return a < b ? -1 : 1;
}
const strA = a.toString();
const strB = b.toString();
if (strA === strB) {
return 0;
}
if (strA === '' && strB !== '') {
return -1;
}
if (strA !== '' && strB === '') {
return 1;
}
const aa = parseFloat(strA);
const bb = parseFloat(strB);
if (Number.isNaN(aa) && !Number.isNaN(bb)) {
return -1;
}
if (!Number.isNaN(aa) && Number.isNaN(bb)) {
return 1;
}
if (aa < bb) {
return -1;
}
return 1;
}
function cellComparer(a, b) {
if (typeof a === 'number' && typeof b === 'number') {
return numberComparer(a, b);
}
return stringComparer(a, b);
}
function defaultRowComparer(columns, a, b) {
if (columns.length === 0) {
throw new Error('Expected to have at least one entry in the columns parameter');
}
if (a === undefined && b === undefined) {
return 0;
}
if (a === undefined && b !== undefined) {
return 1;
}
if (a !== undefined && b === undefined) {
return -1;
}
for (const col of columns) {
const aa = a[col.oldIndex] ?? null;
const bb = b[col.newIndex] ?? null;
const comparer = col.comparer ?? cellComparer;
let delta = comparer(aa, bb);
if (delta !== 0 && col.sortDirection === 'DESC') {
delta = -delta;
}
if (delta !== 0) {
return delta;
}
}
return 0;
}
function roundDecimals(value, decimals) {
const pow = Math.pow(10, decimals);
return Math.round(value * pow) / pow;
}
//# sourceMappingURL=formats.js.map