UNPKG

@natlibfi/melinda-record-match-validator

Version:

Validates if two records matched by melinda-record-matching can be merged and sets merge priority

220 lines (183 loc) 5.55 kB
import createDebugLogger from 'debug'; import moment from 'moment'; import {compareValueContent} from './compareUtils.js'; import {get005, get008} from '../collectFunctions/collectControlFields.js'; import {nvdebug} from '../utils.js'; const debug = createDebugLogger('@natlibfi/melinda-record-match-validator:collectFunctions:collectControlFields'); const debugDev = debug.extend('dev'); //const debugData = debug.extend('data'); // Compare export function compare001(recordValuesA, recordValuesB) { const f001A = recordValuesA['001']; const f001B = recordValuesB['001']; return { 'value': compareValueContent(f001A.value, f001B.value), 'isMelindaId': compareIsMelindaId() }; function compareIsMelindaId() { debugDev('%o vs %o', f001A, f001B); if (f001A.isMelindaId && f001B.isMelindaId) { debugDev('Both are Melinda ids'); return true; } if (f001A.isMelindaId && !f001B.isMelindaId) { debugDev('Only A is Melinda id'); return 'A'; } if (!f001A.isMelindaId && f001B.isMelindaId) { debugDev('Only B is Melinda id'); return 'B'; } debugDev('Both are non Melinda ids'); return false; } } export function compare005(recordValuesA, recordValuesB) { const f005A = recordValuesA['005']; const f005B = recordValuesB['005']; return ratef005(); function ratef005() { debugDev('%o vs %o', f005A, f005B); if (moment(f005A).isSame(f005B)) { debugDev('Both have same last modified time'); return true; } if (moment(f005A).isAfter(f005B)) { debugDev('A has been modified more recently'); return 'A'; } debugDev('B has been modified more recently'); return 'B'; } } /* export function compare008(recordValuesA, recordValuesB) { const f008A = recordValuesA['008']; const f008B = recordValuesB['008']; return innerCompare008(f008A, f008B); } */ // DEVELOP: we do do any comparison based on 008/39 here - is cataloguingSource used in some other comparison task? function innerCompare008(f008A, f008B) { nvdebug(`A 008: ${JSON.stringify(f008A)}`); nvdebug(`B 008: ${JSON.stringify(f008B)}`); if (!isPairableFormOfItem(f008A.formOfItem.code, f008B.formOfItem.code)) { return false; } const mp06Result = mp06Comparison(f008A.publicationStatus.code, f008B.publicationStatus.code); if (mp06Result !== true) { return mp06Result; } return true; function isPairableFormOfItem(formOfItemA, formOfItemB) { // Prevent online and (local) direct electronic resources from merging: // (There are other conflincting values as well, but this is the case I se most likely to cause merges that should not happen.) if (formOfItemA === 'o' && formOfItemB === 'q') { return false; } if (formOfItemA === 'q' && formOfItemB === 'o') { return false; } return true; } function mp06Comparison(mp06A, mp06B) { if (mp06A === mp06B) { return true; } // 'b' (before Christ) is always wrong in our domain if (mp06A === 'b') { return 'B'; } if (mp06B === 'b') { return 'A'; } // After handling 'b', '|' is the ultimate loser: if (mp06A === '|') { return 'B'; } if (mp06B === '|') { return 'A'; } // d < (c or u) < | const continuingResource = compareContinuingResources(mp06A, mp06B); if (continuingResource !== false) { return continuingResource; } // One is a reprint and the other one is not. Abort! /* if (mp06A === 'r' || mp06B === 'r') { return false; } */ const scoreA = scoreSinglePart(mp06A); const scoreB = scoreSinglePart(mp06B); if (scoreA > -1 && scoreB > -1) { if (scoreA > scoreB) { return 'A'; } if (scoreA < scoreB) { return 'B'; } } // Other rules? return true; } function isUnknownOrContinuingResource(mp06) { return ['|', 'c', 'd', 'u'].includes(mp06); } function compareContinuingResources(mp06A, mp06B) { // There should not be pairs here if (!isUnknownOrContinuingResource(mp06A) || !isUnknownOrContinuingResource(mp06B)) { return false; } // d < c or u < | if (mp06A === 'd' || mp06B === '|') { return 'A'; } if (mp06B === 'd' || mp06A === '|') { return 'B'; } // One is 'c' and the other one is 'u'. I'm not sure is one better than the other... return true; } } function scoreSinglePart(mp06) { if (mp06 === 'e' || mp06 === 'r' || mp06 === 't') { // single date return 4; } if (mp06 === 'p' || mp06 === 's') { // single date return 3; } if (mp06 === 'q') { // questionable date return 2; } if (mp06 === 'n') { // unknown date return 1; } return -1; } // check (collect&compare): export function check005({record1, record2}) { const data1 = get005(record1); const data2 = get005(record2); // Theoretically the record with newer timestamp is the better one. // However, we have n+1 load-fixes etc reasons why this is not reliable, so year is good enough for me. const val1 = getYear(data1); const val2 = getYear(data2); if (val1 > val2) { return 'A'; } if (val2 > val1) { return 'B'; } return true; function getYear(value) { return parseInt(value.substr(0, 4), 10); // YYYY is approximate enough } } export function check008({record1, record2}) { //nvdebug(`CHECK 008`); const data1 = get008(record1); const data2 = get008(record2); return innerCompare008(data1, data2); }