UNPKG

tabular-data-differ

Version:

A very efficient library for diffing two sorted streams of tabular data, such as CSV files.

506 lines 22.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const formats_1 = require("./formats"); const streams_1 = require("./streams"); describe('formats', () => { describe('parsing', () => { test('a,b,c', () => { const row = (0, formats_1.parseCsvLine)(',', 'a,b,c'); expect(row).toEqual(['a', 'b', 'c']); }); test('empty list', () => { const row = (0, formats_1.parseCsvLine)(',', ',,'); expect(row).toEqual(['', '', '']); }); test('with spaces', () => { const row = (0, formats_1.parseCsvLine)(',', 'a a,b b,c c'); expect(row).toEqual(['a a', 'b b', 'c c']); }); test('with comma', () => { const row = (0, formats_1.parseCsvLine)(',', '"1,1","2,2","3,3"'); expect(row).toEqual(['1,1', '2,2', '3,3']); }); test('with double quote', () => { const row = (0, formats_1.parseCsvLine)(',', '"a ""b"" c","""a b c""","3 "","" 3"'); expect(row).toEqual(['a "b" c', '"a b c"', '3 "," 3']); }); test('with tab separator', () => { const row = (0, formats_1.parseCsvLine)('\t', 'a\tb\tc'); expect(row).toEqual(['a', 'b', 'c']); }); test('with quoted tab separator', () => { const row = (0, formats_1.parseCsvLine)('\t', 'a\t"b\tb"\tc'); expect(row).toEqual(['a', 'b\tb', 'c']); }); test('with trailing comma', () => { const row = (0, formats_1.parseCsvLine)(',', 'a,b,c,'); expect(row).toEqual(['a', 'b', 'c', '']); }); }); describe('JSON reader', () => { test('single compact row', async () => { const stream = new streams_1.ArrayInputStream([ '[{"id": "1","a":"a1","b":"b1","c":"c1"}]', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c']); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); test('single indented row', async () => { const stream = new streams_1.ArrayInputStream([ '[', ' {"id": "1","a":"a1","b":"b1","c":"c1"}', ']' ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c']); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); test('inlined brackets, with trailing comma', async () => { const stream = new streams_1.ArrayInputStream([ '[{"id": "1","a":"a1","b":"b1","c":"c1"},', '{"id": "2","a":"a2","b":"b2","c":"c2"},', '{"id": "3","a":"a3","b":"b3","c":"c3"}]', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c']); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); const row2 = await reader.readRow(); expect(row2).toEqual(['2', 'a2', 'b2', 'c2']); const row3 = await reader.readRow(); expect(row3).toEqual(['3', 'a3', 'b3', 'c3']); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); test('outlined brackets, with trailing comma', async () => { const stream = new streams_1.ArrayInputStream([ '[', ' {"id": "1","a":"a1","b":"b1","c":"c1"},', ' {"id": "2","a":"a2","b":"b2","c":"c2"},', ' {"id": "3","a":"a3","b":"b3","c":"c3"}', ']', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c']); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); const row2 = await reader.readRow(); expect(row2).toEqual(['2', 'a2', 'b2', 'c2']); const row3 = await reader.readRow(); expect(row3).toEqual(['3', 'a3', 'b3', 'c3']); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); test('inlined brackets, with preceding comma', async () => { const stream = new streams_1.ArrayInputStream([ '[{"id": "1","a":"a1","b":"b1","c":"c1"}', ',{"id": "2","a":"a2","b":"b2","c":"c2"}', ',{"id": "3","a":"a3","b":"b3","c":"c3"}]', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c']); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); const row2 = await reader.readRow(); expect(row2).toEqual(['2', 'a2', 'b2', 'c2']); const row3 = await reader.readRow(); expect(row3).toEqual(['3', 'a3', 'b3', 'c3']); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); test('outlined brackets, with preceding comma', async () => { const stream = new streams_1.ArrayInputStream([ '[', ' {"id": "1","a":"a1","b":"b1","c":"c1"}', ' ,{"id": "2","a":"a2","b":"b2","c":"c2"}', ' ,{"id": "3","a":"a3","b":"b3","c":"c3"}', ']', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c']); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); const row2 = await reader.readRow(); expect(row2).toEqual(['2', 'a2', 'b2', 'c2']); const row3 = await reader.readRow(); expect(row3).toEqual(['3', 'a3', 'b3', 'c3']); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); test('reading a closed stream should fail', async () => { const stream = new streams_1.ArrayInputStream([ '', ]); const reader = new formats_1.JsonFormatReader({ stream }); await expect(async () => { await reader.readHeader(); }).rejects.toThrowError('Cannot access textReader because stream is not open'); }); test('writing to a closed stream should fail', async () => { const writer = new formats_1.JsonFormatWriter({ stream: './output/files/output.json' }); await expect(async () => { await writer.writeHeader({ columns: ['id', 'name'] }); }).rejects.toThrowError('Cannot access textWriter because stream is not open'); }); test('empty string should fail', async () => { const stream = new streams_1.ArrayInputStream([ '', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); await expect(async () => { await reader.readHeader(); }).rejects.toThrowError('Expected to find at least one object'); }); test('empty stream should fail', async () => { const stream = new streams_1.ArrayInputStream([]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); await expect(async () => { await reader.readHeader(); }).rejects.toThrowError('Expected to find at least one object'); }); test('row should contain an object or fail, while reading the header', async () => { const stream = new streams_1.ArrayInputStream([ '123', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); await expect(async () => { await reader.readHeader(); }).rejects.toThrowError('Expected to find a JSON object'); }); test('row should contain an object or fail, while reading the header', async () => { const stream = new streams_1.ArrayInputStream([ '[', ' {"id": "1","a":"a1","b":"b1","c":"c1"},', ' 123,', ' {"id": "3","a":"a3","b":"b3","c":"c3"}', ']', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); await reader.readHeader(); const row1 = await reader.readRow(); expect(row1).toEqual(['1', 'a1', 'b1', 'c1']); await expect(async () => { await reader.readRow(); }).rejects.toThrowError('Expected to find a JSON object'); }); test('should not convert object values to string', async () => { const stream = new streams_1.ArrayInputStream([ '[{"id": 1,"a":"a1","b":true,"c":3.14,"d":null}]', ]); const reader = new formats_1.JsonFormatReader({ stream }); await reader.open(); const header = await reader.readHeader(); expect(header.columns).toEqual(['id', 'a', 'b', 'c', 'd']); const row1 = await reader.readRow(); expect(row1).toEqual([1, 'a1', true, 3.14, null]); const done = await reader.readRow(); expect(done).toBeUndefined(); await reader.close(); }); }); // TODO: write test for JsonFormatWriter and test row with non string values being properly serialized as json record describe('formatting', () => { test('a,b,c', () => { const txt = (0, formats_1.serializeRowAsCsvLine)(['a', 'b', 'c']); expect(txt).toEqual('a,b,c'); }); test('empty list', () => { const txt = (0, formats_1.serializeRowAsCsvLine)(['', '', '']); expect(txt).toEqual(',,'); }); test('with space, comma, double quote', () => { const txt = (0, formats_1.serializeRowAsCsvLine)(['a":"a', 'b b', 'c,c']); expect(txt).toEqual('"a"":""a",b b,"c,c"'); }); }); describe('comparison', () => { test('should have at least one key', () => { expect(() => (0, formats_1.defaultRowComparer)([], undefined, undefined)).toThrowError('Expected to have at least one entry in the columns parameter'); }); describe('undefined rows', () => { const keys = [{ name: 'id', oldIndex: 0, newIndex: 0, }]; test('equal', () => { const res = (0, formats_1.defaultRowComparer)(keys, undefined, undefined); expect(res).toBe(0); }); test('less than', () => { const a = ['2', 'a']; const b = undefined; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(-1); }); test('greater than', () => { const a = undefined; const b = ['2', 'a']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(1); }); }); describe('single pk column', () => { const keys = [{ name: 'id', oldIndex: 0, newIndex: 0, }]; test('equal', () => { const a = ['1', 'a']; const b = ['1', 'b']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(0); }); test('less than', () => { const a = ['1', 'b']; const b = ['2', 'a']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(-1); }); test('greater than', () => { const a = ['2', 'a']; const b = ['1', 'b']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(1); }); }); describe('2 pk columns', () => { const keys = [ { name: 'i1d', oldIndex: 0, newIndex: 0, }, { name: 'id2', oldIndex: 1, newIndex: 1, }, ]; test('equal', () => { const a = ['1', '1', 'c']; const b = ['1', '1', 'd']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(0); }); test('less than', () => { const a = ['1', 'a', 'c']; const b = ['1', 'b', 'c']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(-1); }); test('less than undefined field', () => { const a = ['1']; const b = ['1', 'b', 'c']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(-1); }); test('greater than', () => { const a = ['1', 'b', 'c']; const b = ['1', 'a', 'c']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(1); }); test('greater than undefined field', () => { const a = ['1', 'b', 'c']; const b = ['1']; const res = (0, formats_1.defaultRowComparer)(keys, a, b); expect(res).toBe(1); }); }); test('number comparison', () => { expect((0, formats_1.numberComparer)(null, null)).toBe(0); expect((0, formats_1.numberComparer)(1, null)).toBe(1); expect((0, formats_1.numberComparer)(null, 1)).toBe(-1); expect((0, formats_1.numberComparer)(1, '')).toBe(1); expect((0, formats_1.numberComparer)('', 1)).toBe(-1); expect((0, formats_1.numberComparer)(0, 0)).toBe(0); expect((0, formats_1.numberComparer)('0', 0)).toBe(0); expect((0, formats_1.numberComparer)(0, '0')).toBe(0); expect((0, formats_1.numberComparer)(1.1, 1.1)).toBe(0); expect((0, formats_1.numberComparer)(1.1, 1.2)).toBe(-1); expect((0, formats_1.numberComparer)(1.2, 1.1)).toBe(1); expect((0, formats_1.numberComparer)(-10, 0)).toBe(-1); expect((0, formats_1.numberComparer)(0, -10)).toBe(1); expect((0, formats_1.numberComparer)(0, 10)).toBe(-1); expect((0, formats_1.numberComparer)(10, 0)).toBe(1); expect((0, formats_1.numberComparer)(null, '')).toBe(-1); expect((0, formats_1.numberComparer)('', null)).toBe(1); expect((0, formats_1.numberComparer)(null, true)).toBe(-1); expect((0, formats_1.numberComparer)(true, null)).toBe(1); expect((0, formats_1.numberComparer)(true, true)).toBe(0); expect((0, formats_1.numberComparer)(true, false)).toBe(1); expect((0, formats_1.numberComparer)(false, true)).toBe(-1); expect((0, formats_1.numberComparer)(null, 'abc')).toBe(-1); expect((0, formats_1.numberComparer)('abc', null)).toBe(1); expect((0, formats_1.numberComparer)('1', '1')).toBe(0); expect((0, formats_1.numberComparer)('1', '2')).toBe(-1); expect((0, formats_1.numberComparer)('2', '1')).toBe(1); expect((0, formats_1.numberComparer)('2', '11')).toBe(-1); expect((0, formats_1.numberComparer)('11', '2')).toBe(1); expect((0, formats_1.numberComparer)('1.1', '1.1')).toBe(0); expect((0, formats_1.numberComparer)('1.1', '1.2')).toBe(-1); expect((0, formats_1.numberComparer)('1.2', '1.1')).toBe(1); expect((0, formats_1.numberComparer)('x1.1', 'x1.1')).toBe(0); expect((0, formats_1.numberComparer)('x1.1', '1.1')).toBe(-1); expect((0, formats_1.numberComparer)('1.1', 'x1.1')).toBe(1); }); }); describe('Iterable source', () => { test('should read all objects', async () => { const format = new formats_1.IterableFormatReader({ provider: someAsyncSource, }); await format.open(); const header = await format.readHeader(); expect(header.columns).toEqual(['id', 'name', 'age']); const row1 = await format.readRow(); expect(row1).toEqual([1, 'John', 33]); const row2 = await format.readRow(); expect(row2).toEqual([2, 'Mary', 22]); const row3 = await format.readRow(); expect(row3).toEqual([3, 'Cindy', 44]); const row4 = await format.readRow(); expect(row4).toBeUndefined(); await format.close(); }); test('should read first object', async () => { const format = new formats_1.IterableFormatReader({ provider: someAsyncSource, }); await format.open(); const header = await format.readHeader(); expect(header.columns).toEqual(['id', 'name', 'age']); const row1 = await format.readRow(); expect(row1).toEqual([1, 'John', 33]); await format.close(); }); test('should re-open', async () => { const format = new formats_1.IterableFormatReader({ provider: someAsyncSource, }); await format.open(); const header = await format.readHeader(); expect(header.columns).toEqual(['id', 'name', 'age']); const row1 = await format.readRow(); expect(row1).toEqual([1, 'John', 33]); await format.close(); await format.open(); const headerBis = await format.readHeader(); expect(headerBis.columns).toEqual(['id', 'name', 'age']); const row1Bis = await format.readRow(); expect(row1Bis).toEqual([1, 'John', 33]); await format.close(); }); test('should open first', async () => { const format = new formats_1.IterableFormatReader({ provider: someAsyncSource, }); await expect(async () => { await format.readHeader(); }).rejects.toThrowError('You must call open before reading content!'); }); test('should open once', async () => { const format = new formats_1.IterableFormatReader({ provider: someAsyncSource, }); await format.open(); await expect(async () => { await format.open(); }).rejects.toThrowError('Reader is already open!'); }); test('should not be empty', async () => { const format = new formats_1.IterableFormatReader({ provider: () => someAsyncSource(0), }); await format.open(); try { await expect(async () => { await format.readHeader(); }).rejects.toThrowError('Expected to find at least one object'); } finally { format.close(); } }); }); describe('BufferedFormatReader', () => { test('should peek rows', async () => { const format = new formats_1.BufferedFormatReader(new formats_1.IterableFormatReader({ provider: someAsyncSource, })); await format.open(); const header = await format.readHeader(); expect(header.columns).toEqual(['id', 'name', 'age']); const row1 = await format.readRow(); expect(row1).toEqual([1, 'John', 33]); const row2 = await format.peekRow(); expect(row2).toEqual([2, 'Mary', 22]); const row2b = await format.peekRow(); expect(row2b).toBe(row2); const row2c = await format.readRow(); expect(row2c).toBe(row2); const row3 = await format.readRow(); expect(row3).toEqual([3, 'Cindy', 44]); const row4 = await format.readRow(); expect(row4).toBeUndefined(); await format.close(); }); }); }); async function* someAsyncSource(limit) { let items = [ { id: 1, name: 'John', age: 33, }, { id: 2, name: 'Mary', age: 22, }, { id: 3, name: 'Cindy', age: 44, }, ]; if (limit !== undefined) { items = items.slice(0, limit); } for (const item of items) { yield item; } } //# sourceMappingURL=formats.test.js.map