UNPKG

@natlibfi/melinda-record-match-validator

Version:

Validates if two records matched by melinda-record-matching can be merged and sets merge priority

152 lines (116 loc) 5.13 kB
import moment from 'moment'; import {hasFields} from './collectFunctions/collectUtils'; import createDebugLogger from 'debug'; const debug = createDebugLogger('@natlibfi/melinda-record-match-validator:CAT'); const debugDev = debug.extend('dev'); //const debugData = debug.extend('data'); export function getCAT(record) { // if not fields [] const CATs = hasFields('CAT', record, catToJSON); const [latest, ...otherCats] = CATs.reverse(); //eslint-disable-line functional/immutable-data if (latest === undefined) { return {latest: {cataloger: 'undefined', time: 'undefined'}, otherCats: [], noCats: true}; } debugDev('Latest CAT: %o', latest); debugDev('Other CATs: %o', otherCats); return {latest, otherCats}; function catToJSON(cat) { const [catalogerSubfield] = cat.subfields.filter(sub => sub.code === 'a').map(sub => sub.value); const cataloger = catalogerSubfield === undefined ? 'undefined' : catalogerSubfield; const catDate = cat.subfields.filter(sub => sub.code === 'c').map(sub => sub.value); const catClock = cat.subfields.filter(sub => sub.code === 'h').map(sub => sub.value); const time = moment(catDate + catClock, ['YYYYMMDDHHmm'], true).format('YYYY-MM-DDTHH:mm:ss'); return {cataloger, time}; } } export function compareCAT(recordValuesA, recordValuesB) { const CATsA = recordValuesA.CAT; const CATsB = recordValuesB.CAT; return innerCompareCat(CATsA, CATsB); } // eslint-disable-next-line complexity, max-statements function innerCompareCat(CATsA, CATsB) { debugDev('Comparing CATs: A: %o vs B: %o', CATsA, CATsB); // No need for analysing CATs if neither of records has CATs if (CATsA.noCats && CATsB.noCats) { return true; } // The latest CAT is same -> merging ok, no preference const hasSameLatestCAT = CATsA.latest.cataloger === CATsB.latest.cataloger && CATsA.latest.time === CATsB.latest.time; debugDev('Has same latest CAT: %o', hasSameLatestCAT); if (hasSameLatestCAT) { return true; } debugDev(`-- Comparing AtoB`); const resultA = analyzeCATs(CATsA, CATsB); debugDev(`-- Comparing BtoA`); const resultB = analyzeCATs(CATsB, CATsA); // Preference for record that has extra CATs after common CAT history if (resultA.isAheadOfOther && !resultB.isAheadOfOther) { return 'A'; } if (!resultA.isAheadOfOther && resultB.isAheadOfOther) { return 'B'; } // If other record has no CATs, preference for record that has non-automatic CATs if (CATsA.noCats && resultB.nonCompCats.length > 0) { return 'B'; } if (CATsB.noCats && resultA.nonCompCats.length > 0) { return 'A'; } // There is a common CAT somewhere in history if (resultA.commonOtherCats.length > 0) { // Preference for record that has extra CATs after common CAT if (resultB.updatesAfterCommonCAT.length === 0 && resultA.updatesAfterCommonCAT.length > 0) { return 'A'; } if (resultA.updatesAfterCommonCAT.length === 0 && resultB.updatesAfterCommonCAT.length > 0) { return 'B'; } // Preference for record that has non-automatic CATs if (resultA.nonCompCats.length > 0 && resultB.nonCompCats.length === 0) { return 'A'; } if (resultB.nonCompCats.length > 0 && resultA.nonCompCats.length === 0) { return 'B'; } // Both have X amount of uniq updates after common return true; // CAT-comparison is for preference only } return true; // CAT-comparison is for preference only function analyzeCATs(CATsCompareTo, CATsToCompare) { // Look for identical CATs: const isAheadOfOther = compareIfArrayContainsCat(CATsToCompare.latest, CATsCompareTo.otherCats); debugDev('Is ahead of the other: %o', isAheadOfOther); const commonOtherCats = CATsCompareTo.otherCats.filter(cat => compareIfArrayContainsCat(cat, CATsToCompare.otherCats)); debugDev('Contains common CATs: %o', commonOtherCats); const updatesAfterCommonCAT = CATsCompareTo.otherCats.indexOf(commonOtherCats[0]); debugDev('Contains %o CATs after common CAT', updatesAfterCommonCAT); const nonCompCats = catsContainNonImpOrLoad(CATsCompareTo.latest, CATsCompareTo.otherCats); debugDev('CATs contains NON "IMP-" or "LOAD-" or "CONV-" CATs: %o', nonCompCats); return { isAheadOfOther, commonOtherCats, updatesAfterCommonCAT, nonCompCats }; } function catsContainNonImpOrLoad(latest, otherCats) { const nonImpOrLoadRegex = /^LOAD-\w*|^LOAD_\w*|^IMP-\w*|^IMP_\w*|^CONV-\w*|^REM-\w*|^FENNI-KV$|^undefined$/u; return [latest, ...otherCats].filter(cat => cat.cataloger !== undefined && !nonImpOrLoadRegex.test(cat.cataloger)); } function compareIfArrayContainsCat(catToCompare, catArray) { return catArray.some(cat => { if (cat.cataloger === null || catToCompare.cataloger === null) { return false; } return catToCompare.cataloger === cat.cataloger && catToCompare.time === cat.time; }); } } export function checkCAT({record1, record2}) { const data1 = getCAT(record1); const data2 = getCAT(record2); return innerCompareCat(data1, data2); }