openrxiv-utils
Version:
Utility functions for bioRxiv operations including URL parsing and DOI handling
510 lines (459 loc) • 21.6 kB
text/typescript
import { describe, it, expect } from 'vitest';
import { getFolderStructure, normalizeBatch } from './folder-structure.js';
describe('Content Structure Utilities', () => {
describe('normalizeBatch', () => {
describe('bioRxiv server (default)', () => {
it.each([
// Number inputs
[1, 'Batch_01'],
[5, 'Batch_05'],
[10, 'Batch_10'],
[25, 'Batch_25'],
[100, 'Batch_100'],
// String inputs with various formats
['1', 'Batch_01'],
['5', 'Batch_05'],
['10', 'Batch_10'],
['01', 'Batch_01'],
['05', 'Batch_05'],
// With "batch" prefix
['batch1', 'Batch_01'],
['batch-1', 'Batch_01'],
['batch_1', 'Batch_01'],
['Batch1', 'Batch_01'],
['BATCH-1', 'Batch_01'],
['BATCH_1', 'Batch_01'],
// With leading zeros
['001', 'Batch_01'],
['005', 'Batch_05'],
['010', 'Batch_10'],
['batch001', 'Batch_01'],
['batch-001', 'Batch_01'],
['batch_001', 'Batch_01'],
// Edge cases
['batch-01', 'Batch_01'],
['batch_01', 'Batch_01'],
['Batch_01', 'Batch_01'],
['BATCH_01', 'Batch_01'],
])('should normalize "%s" to "%s" for bioRxiv', (input, expected) => {
expect(normalizeBatch(input, 'biorxiv')).toBe(expected);
});
it.each([
['0', 'Invalid batch format: 0. Expected a positive number or batch identifier.'],
['-1', 'Invalid batch format: -1. Expected a positive number or batch identifier.'],
['abc', 'Invalid batch format: abc. Expected a positive number or batch identifier.'],
[
'batch-abc',
'Invalid batch format: batch-abc. Expected a positive number or batch identifier.',
],
['', 'Invalid batch format: . Expected a positive number or batch identifier.'],
['batch-', 'Invalid batch format: batch-. Expected a positive number or batch identifier.'],
['batch_', 'Invalid batch format: batch_. Expected a positive number or batch identifier.'],
])('should throw error for invalid input "%s" for bioRxiv', (input, expectedError) => {
expect(() => normalizeBatch(input, 'biorxiv')).toThrow(expectedError);
});
it.each([
[0, 'Invalid batch format: 0. Expected a positive number or batch identifier.'],
[-1, 'Invalid batch format: -1. Expected a positive number or batch identifier.'],
[-100, 'Invalid batch format: -100. Expected a positive number or batch identifier.'],
])('should throw error for invalid number input %d for bioRxiv', (input, expectedError) => {
expect(() => normalizeBatch(input, 'biorxiv')).toThrow(expectedError);
});
});
describe('medRxiv server', () => {
it.each([
// Number inputs
[1, 'medRxiv_Batch_01'],
[5, 'medRxiv_Batch_05'],
[10, 'medRxiv_Batch_10'],
[25, 'medRxiv_Batch_25'],
[100, 'medRxiv_Batch_100'],
// String inputs with various formats
['1', 'medRxiv_Batch_01'],
['5', 'medRxiv_Batch_05'],
['10', 'medRxiv_Batch_10'],
['01', 'medRxiv_Batch_01'],
['05', 'medRxiv_Batch_05'],
// With "batch" prefix
['batch1', 'medRxiv_Batch_01'],
['batch-1', 'medRxiv_Batch_01'],
['batch_1', 'medRxiv_Batch_01'],
['Batch1', 'medRxiv_Batch_01'],
['BATCH-1', 'medRxiv_Batch_01'],
['BATCH_1', 'medRxiv_Batch_01'],
// With "medrxiv" prefix
['medrxiv-batch1', 'medRxiv_Batch_01'],
['medrxiv-batch-1', 'medRxiv_Batch_01'],
['medrxiv_batch_1', 'medRxiv_Batch_01'],
['MedRxiv-Batch1', 'medRxiv_Batch_01'],
['MEDRXIV-BATCH-1', 'medRxiv_Batch_01'],
['MEDRXIV_BATCH_1', 'medRxiv_Batch_01'],
// With leading zeros
['001', 'medRxiv_Batch_01'],
['005', 'medRxiv_Batch_05'],
['010', 'medRxiv_Batch_10'],
['batch001', 'medRxiv_Batch_01'],
['batch-001', 'medRxiv_Batch_01'],
['batch_001', 'medRxiv_Batch_01'],
['medrxiv-batch001', 'medRxiv_Batch_01'],
['medrxiv-batch-001', 'medRxiv_Batch_01'],
['medrxiv_batch_001', 'medRxiv_Batch_01'],
// Edge cases
['batch-01', 'medRxiv_Batch_01'],
['batch_01', 'medRxiv_Batch_01'],
['Batch_01', 'medRxiv_Batch_01'],
['BATCH_01', 'medRxiv_Batch_01'],
['medrxiv-batch-01', 'medRxiv_Batch_01'],
['medrxiv_batch_01', 'medRxiv_Batch_01'],
['MedRxiv_Batch_01', 'medRxiv_Batch_01'],
['MEDRXIV_BATCH_01', 'medRxiv_Batch_01'],
])('should normalize "%s" to "%s" for medRxiv', (input, expected) => {
expect(normalizeBatch(input, 'medrxiv')).toBe(expected);
});
it.each([
['0', 'Invalid batch format: 0. Expected a positive number or batch identifier.'],
['-1', 'Invalid batch format: -1. Expected a positive number or batch identifier.'],
['abc', 'Invalid batch format: abc. Expected a positive number or batch identifier.'],
[
'batch-abc',
'Invalid batch format: batch-abc. Expected a positive number or batch identifier.',
],
[
'medrxiv-batch-abc',
'Invalid batch format: medrxiv-batch-abc. Expected a positive number or batch identifier.',
],
['', 'Invalid batch format: . Expected a positive number or batch identifier.'],
['batch-', 'Invalid batch format: batch-. Expected a positive number or batch identifier.'],
['batch_', 'Invalid batch format: batch_. Expected a positive number or batch identifier.'],
[
'medrxiv-batch-',
'Invalid batch format: medrxiv-batch-. Expected a positive number or batch identifier.',
],
[
'medrxiv_batch_',
'Invalid batch format: medrxiv_batch_. Expected a positive number or batch identifier.',
],
])('should throw error for invalid input "%s" for medRxiv', (input, expectedError) => {
expect(() => normalizeBatch(input, 'medrxiv')).toThrow(expectedError);
});
it.each([
[0, 'Invalid batch format: 0. Expected a positive number or batch identifier.'],
[-1, 'Invalid batch format: -1. Expected a positive number or batch identifier.'],
[-100, 'Invalid batch format: -100. Expected a positive number or batch identifier.'],
])('should throw error for invalid number input %d for medRxiv', (input, expectedError) => {
expect(() => normalizeBatch(input, 'medrxiv')).toThrow(expectedError);
});
});
describe('default behavior (no server specified)', () => {
it('should default to bioRxiv format when no server specified', () => {
expect(normalizeBatch(1)).toBe('Batch_01');
expect(normalizeBatch('5')).toBe('Batch_05');
expect(normalizeBatch('batch-10')).toBe('Batch_10');
});
});
describe('case insensitive server names', () => {
it('should handle case insensitive server names', () => {
expect(normalizeBatch(1, 'BIORXIV')).toBe('Batch_01');
expect(normalizeBatch(1, 'BioRxiv')).toBe('Batch_01');
expect(normalizeBatch(1, 'MEDRXIV')).toBe('medRxiv_Batch_01');
expect(normalizeBatch(1, 'MedRxiv')).toBe('medRxiv_Batch_01');
});
});
});
describe('getFolderStructure', () => {
describe('with batch option', () => {
describe('bioRxiv server', () => {
it.each([
['1', 'Back_Content/Batch_01/'],
['5', 'Back_Content/Batch_05/'],
['10', 'Back_Content/Batch_10/'],
['batch1', 'Back_Content/Batch_01/'],
['batch-1', 'Back_Content/Batch_01/'],
['batch_1', 'Back_Content/Batch_01/'],
['Batch_01', 'Back_Content/Batch_01/'],
['BATCH_01', 'Back_Content/Batch_01/'],
])(
'should create Back_Content structure for batch "%s" on bioRxiv',
(batch, expectedPrefix) => {
const result = getFolderStructure({ batch, server: 'biorxiv' });
expect(result.type).toBe('back');
expect(result.prefix).toBe(expectedPrefix);
expect(result.batch).toBe(`Batch_${batch.replace(/\D/g, '').padStart(2, '0')}`);
},
);
});
describe('medRxiv server', () => {
it.each([
['1', 'Back_Content/medRxiv_Batch_01/'],
['5', 'Back_Content/medRxiv_Batch_05/'],
['10', 'Back_Content/medRxiv_Batch_10/'],
['batch1', 'Back_Content/medRxiv_Batch_01/'],
['batch-1', 'Back_Content/medRxiv_Batch_01/'],
['batch_1', 'Back_Content/medRxiv_Batch_01/'],
['medrxiv-batch1', 'Back_Content/medRxiv_Batch_01/'],
['medrxiv-batch-1', 'Back_Content/medRxiv_Batch_01/'],
['medrxiv_batch_1', 'Back_Content/medRxiv_Batch_01/'],
['MedRxiv_Batch_01', 'Back_Content/medRxiv_Batch_01/'],
])(
'should create Back_Content structure for batch "%s" on medRxiv',
(batch, expectedPrefix) => {
const result = getFolderStructure({ batch, server: 'medrxiv' });
expect(result.type).toBe('back');
expect(result.prefix).toBe(expectedPrefix);
expect(result.batch).toBe(`medRxiv_Batch_${batch.replace(/\D/g, '').padStart(2, '0')}`);
},
);
});
describe('default server (bioRxiv)', () => {
it.each([
['1', 'Back_Content/Batch_01/'],
['5', 'Back_Content/Batch_05/'],
['10', 'Back_Content/Batch_10/'],
])(
'should create Back_Content structure for batch "%s" with default server',
(batch, expectedPrefix) => {
const result = getFolderStructure({ batch });
expect(result.type).toBe('back');
expect(result.prefix).toBe(expectedPrefix);
expect(result.batch).toBe(`Batch_${batch.replace(/\D/g, '').padStart(2, '0')}`);
},
);
});
});
describe('with month option - Current Content period', () => {
describe('bioRxiv server', () => {
it.each([
['2019-01', 'Current_Content/January_2019/', 'January_2019'],
['2019-06', 'Current_Content/June_2019/', 'June_2019'],
['2019-12', 'Current_Content/December_2019/', 'December_2019'],
['2020-01', 'Current_Content/January_2020/', 'January_2020'],
['2024-01', 'Current_Content/January_2024/', 'January_2024'],
['2025-01', 'Current_Content/January_2025/', 'January_2025'],
['January_2019', 'Current_Content/January_2019/', 'January_2019'],
['June_2019', 'Current_Content/June_2019/', 'June_2019'],
['December_2019', 'Current_Content/December_2019/', 'December_2019'],
['dec-2019', 'Current_Content/December_2019/', 'December_2019'],
['December-2019', 'Current_Content/December_2019/', 'December_2019'],
])(
'should create Current_Content structure for month "%s" on bioRxiv',
(month, expectedPrefix, batch) => {
const result = getFolderStructure({ month, server: 'biorxiv' });
expect(result.type).toBe('current');
expect(result.prefix).toBe(expectedPrefix);
expect(result.batch).toBe(batch);
},
);
});
describe('medRxiv server', () => {
it.each([
['2019-01', 'Current_Content/January_2019/', 'January_2019'],
['2019-06', 'Current_Content/June_2019/', 'June_2019'],
['2019-12', 'Current_Content/December_2019/', 'December_2019'],
['2020-01', 'Current_Content/January_2020/', 'January_2020'],
['2024-01', 'Current_Content/January_2024/', 'January_2024'],
['2025-01', 'Current_Content/January_2025/', 'January_2025'],
['January_2019', 'Current_Content/January_2019/', 'January_2019'],
['June_2019', 'Current_Content/June_2019/', 'June_2019'],
['December_2019', 'Current_Content/December_2019/', 'December_2019'],
])(
'should create Current_Content structure for month "%s" on medRxiv',
(month, expectedPrefix, batch) => {
const result = getFolderStructure({ month, server: 'medrxiv' });
expect(result.type).toBe('current');
expect(result.prefix).toBe(expectedPrefix);
expect(result.batch).toBe(batch);
},
);
});
describe('default server (bioRxiv)', () => {
it.each([
['2019-01', 'Current_Content/January_2019/', 'January_2019'],
['2024-01', 'Current_Content/January_2024/', 'January_2024'],
])(
'should create Current_Content structure for month "%s" with default server',
(month, expectedPrefix, batch) => {
const result = getFolderStructure({ month });
expect(result.type).toBe('current');
expect(result.prefix).toBe(expectedPrefix);
expect(result.batch).toBe(batch);
},
);
});
});
describe('with month option - Back Content period', () => {
describe('bioRxiv server', () => {
it.each([
[
'2018-11',
"Date 2018-11 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'2018-10',
"Date 2018-10 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'2018-01',
"Date 2018-01 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'2017-12',
"Date 2017-12 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'2015-06',
"Date 2015-06 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'November_2018',
"Date November_2018 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'October_2018',
"Date October_2018 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
])(
'should throw error for Back_Content period month "%s" on bioRxiv',
(month, expectedError) => {
expect(() => getFolderStructure({ month, server: 'biorxiv' })).toThrow(expectedError);
},
);
});
describe('medRxiv server', () => {
it.each([
[
'2018-11',
"Date 2018-11 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'2018-10',
"Date 2018-10 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
[
'2018-01',
"Date 2018-01 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
])(
'should throw error for Back_Content period month "%s" on medRxiv',
(month, expectedError) => {
expect(() => getFolderStructure({ month, server: 'medrxiv' })).toThrow(expectedError);
},
);
});
describe('default server (bioRxiv)', () => {
it.each([
[
'2018-11',
"Date 2018-11 is in the Back_Content period. Please specify a batch using --batch option. Available batches can be listed with 'biorxiv list' command.",
],
])(
'should throw error for Back_Content period month "%s" with default server',
(month, expectedError) => {
expect(() => getFolderStructure({ month })).toThrow(expectedError);
},
);
});
});
describe('with invalid inputs', () => {
it.each([
[{}, 'Either month or batch must be specified'],
[{ month: undefined, batch: undefined }, 'Either month or batch must be specified'],
[{ month: '', batch: '' }, 'Either month or batch must be specified'],
[
{ month: 'invalid-month' },
'Invalid month format: invalid-month. Expected YYYY-MM or Month_YYYY format.',
],
[
{ month: '2024-13' },
'Invalid month format: 2024-13. Expected YYYY-MM or Month_YYYY format.',
],
[
{ month: '2024-00' },
'Invalid month format: 2024-00. Expected YYYY-MM or Month_YYYY format.',
],
[
{ month: 'InvalidMonth_2024' },
'Invalid month format: InvalidMonth_2024. Expected YYYY-MM or Month_YYYY format.',
],
])('should throw error for invalid input %o', (input, expectedError) => {
expect(() => getFolderStructure(input)).toThrow(expectedError);
});
});
describe('edge cases', () => {
describe('bioRxiv server', () => {
it('should handle December 2018 as Current Content (cutoff date)', () => {
const result = getFolderStructure({ month: '2018-12', server: 'biorxiv' });
expect(result.type).toBe('current');
expect(result.prefix).toBe('Current_Content/December_2018/');
expect(result.batch).toBe('December_2018');
});
it('should handle January 2019 as Current Content (after cutoff)', () => {
const result = getFolderStructure({ month: '2019-01', server: 'biorxiv' });
expect(result.type).toBe('current');
expect(result.prefix).toBe('Current_Content/January_2019/');
expect(result.batch).toBe('January_2019');
});
it('should handle November 2018 as Back Content (before cutoff)', () => {
expect(() => getFolderStructure({ month: '2018-11', server: 'biorxiv' })).toThrow(
'Date 2018-11 is in the Back_Content period. Please specify a batch using --batch option.',
);
});
});
describe('medRxiv server', () => {
it('should handle December 2018 as Current Content (cutoff date)', () => {
const result = getFolderStructure({ month: '2018-12', server: 'medrxiv' });
expect(result.type).toBe('current');
expect(result.prefix).toBe('Current_Content/December_2018/');
expect(result.batch).toBe('December_2018');
});
it('should handle January 2019 as Current Content (after cutoff)', () => {
const result = getFolderStructure({ month: '2019-01', server: 'medrxiv' });
expect(result.type).toBe('current');
expect(result.prefix).toBe('Current_Content/January_2019/');
expect(result.batch).toBe('January_2019');
});
it('should handle November 2018 as Back Content (before cutoff)', () => {
expect(() => getFolderStructure({ month: '2018-11', server: 'medrxiv' })).toThrow(
'Date 2018-11 is in the Back_Content period. Please specify a batch using --batch option.',
);
});
});
describe('default server (bioRxiv)', () => {
it('should handle December 2018 as Current Content (cutoff date)', () => {
const result = getFolderStructure({ month: '2018-12' });
expect(result.type).toBe('current');
expect(result.prefix).toBe('Current_Content/December_2018/');
expect(result.batch).toBe('December_2018');
});
it('should handle January 2019 as Current Content (after cutoff)', () => {
const result = getFolderStructure({ month: '2019-01' });
expect(result.type).toBe('current');
expect(result.prefix).toBe('Current_Content/January_2019/');
expect(result.batch).toBe('January_2019');
});
it('should handle November 2018 as Back Content (before cutoff)', () => {
expect(() => getFolderStructure({ month: '2018-11' })).toThrow(
'Date 2018-11 is in the Back_Content period. Please specify a batch using --batch option.',
);
});
});
});
describe('server validation', () => {
it('should not allow both month and batch to be specified', () => {
expect(() => getFolderStructure({ month: '2024-01', batch: '1' })).toThrow(
'Either month or batch must be specified, not both',
);
expect(() =>
getFolderStructure({ month: '2024-01', batch: '1', server: 'medrxiv' }),
).toThrow('Either month or batch must be specified, not both');
});
it('should require either month or batch', () => {
expect(() => getFolderStructure({ server: 'biorxiv' })).toThrow(
'Either month or batch must be specified',
);
expect(() => getFolderStructure({ server: 'medrxiv' })).toThrow(
'Either month or batch must be specified',
);
});
});
});
});