UNPKG

@natlibfi/melinda-record-match-validator

Version:

Validates if two records matched by melinda-record-matching can be merged and sets merge priority

334 lines (317 loc) 9.91 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.check005 = check005; exports.check008 = check008; exports.compare001 = compare001; exports.compare005 = compare005; exports.get001 = get001; exports.get005 = get005; exports.get008 = get008; var _debug = _interopRequireDefault(require("debug")); var _moment = _interopRequireDefault(require("moment")); var _compareUtils = require("./compareFunctions/compareUtils"); var _utils = require("./utils"); function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; } const debug = (0, _debug.default)('@natlibfi/melinda-record-match-validator:controlFields'); const debugDev = debug.extend('dev'); //const debugData = debug.extend('data'); // Collect: function get001(record) { const [f001Value] = record.get('001').map(field => field.value); const [f003Value] = record.get('003').map(field => field.value); const isMelindaId = f003Value === 'FI-MELINDA'; debugDev('Record f001 value: %o', f001Value); debugDev('Record f001 value is melinda id: %o', isMelindaId); return { value: f001Value, isMelindaId }; } function get005(record) { const [f005Value] = record.get('005').map(field => field.value); const time = (0, _moment.default)(f005Value, ['YYYYMMDDHHmmss.S'], true).format('YYYY-MM-DDTHH:mm:ss'); debugDev('Last modification time: %o', time); return time; } // 008-all materials 06 - Type of date/Publication status // https://www.loc.gov/marc/bibliographic/bd008a.html const publicationStatusHash = { 'b': 'No dates given; B.C. date involved', 'c': 'Continuing resource currently published', 'd': 'Continuing resource ceased publication', 'e': 'Detailed date', 'i': 'Inclusive dates of collection', 'k': 'Range of year of bulk of collection', 'm': 'Multiple dates', 'n': 'Dates unknown', 'p': 'Date of distribution/release/issue and production/recording session when different', 'q': 'Questionable date', 'r': 'Reprint/reissue date and original date', 's': 'Single known/probable date', 't': 'Publication date and copyright date', 'u': 'Continuing resource status unknown', '|': 'No attempt to code' }; // 008-all materials 39 - Cataloging source // https://www.loc.gov/marc/bibliographic/bd008a.html const catalogingSourceHash = { ' ': 'National bibliographical agency', 'c': 'Cooperative cataloging program', 'd': 'Other', 'u': 'Unknown', '|': 'No attempt to code' }; // // 008-BK/CF/MU/SE/MX 23 - Form of item // 008-MP/VM 29 - Form of item //https://www.loc.gov/marc/bibliographic/bd008.html const formOfItemHash = { ' ': 'None of the following, expect for CF unknown or not specified', 'a': 'Microfilm', 'b': 'Microfiche', 'c': 'Microopaque', 'd': 'Large print', 'f': 'Braille', 'o': 'Online', 'q': 'Direct electronic', 'r': 'Regular print reproduction', 's': 'Electronic', '|': 'No attempt to code' }; function get008(record) { const [f008Value] = record.get('008').map(field => field.value); const publicationStatus = f008Value ? f008Value[6] : '|'; // eslint-disable-line prefer-destructuring const catalogingSource = f008Value ? f008Value[39] : '|'; // eslint-disable-line prefer-destructuring const formOfItem = getFormOfItem(); //nvdebug(` get008(): ${publicationStatus}, ${catalogingSource}, ${formOfItem}`); //console.log(`LDR/07 ${recordBibLevelRaw}`); // eslint-disable-line no-console //debug('Record type raw: %o', recordTypeRaw); //debug('Record bib level raw: %o', recordBibLevelRaw); //debug('Record completion level raw: %o', recordCompletionLevel); const result = { catalogingSource: mapCatalogingSource(catalogingSource), publicationStatus: mapPublicationStatus(publicationStatus), formOfItem: mapFormOfItem(formOfItem) }; return result; function getFormOfItem() { if (!f008Value) { return '|'; } if (record.isMP() || record.isVM()) { return f008Value[29]; } return f008Value[23]; } function mapPublicationStatus(publicationStatus) { const tmp = publicationStatus in publicationStatusHash ? publicationStatus : '|'; return { level: publicationStatusHash[tmp], code: tmp }; } function mapCatalogingSource(catalogingSource) { const tmp = catalogingSource in catalogingSourceHash ? catalogingSource : '|'; return { level: catalogingSourceHash[tmp], code: tmp }; } function mapFormOfItem(formOfItemCode) { const tmp = formOfItemCode in formOfItemHash ? formOfItemCode : '|'; (0, _utils.nvdebug)(`FOO ${tmp}`); return { form: formOfItemHash[tmp], code: tmp }; } } // Compare function compare001(recordValuesA, recordValuesB) { const f001A = recordValuesA['001']; const f001B = recordValuesB['001']; return { 'value': (0, _compareUtils.compareValueContent)(f001A.value, f001B.value), 'isMelindaId': compareIsMelindaId() }; function compareIsMelindaId() { debugDev('%o vs %o', f001A, f001B); if (f001A.isMelindaId && f001B.isMelindaId) { debugDev('Both are Melinda ids'); return true; } if (f001A.isMelindaId && !f001B.isMelindaId) { debugDev('Only A is Melinda id'); return 'A'; } if (!f001A.isMelindaId && f001B.isMelindaId) { debugDev('Only B is Melinda id'); return 'B'; } debugDev('Both are non Melinda ids'); return false; } } function compare005(recordValuesA, recordValuesB) { const f005A = recordValuesA['005']; const f005B = recordValuesB['005']; return ratef005(); function ratef005() { debugDev('%o vs %o', f005A, f005B); if ((0, _moment.default)(f005A).isSame(f005B)) { debugDev('Both have same last modified time'); return true; } if ((0, _moment.default)(f005A).isAfter(f005B)) { debugDev('A has been modified more recently'); return 'A'; } debugDev('B has been modified more recently'); return 'B'; } } /* export function compare008(recordValuesA, recordValuesB) { const f008A = recordValuesA['008']; const f008B = recordValuesB['008']; return innerCompare008(f008A, f008B); } */ // DEVELOP: we do do any comparison based on 008/39 here - is cataloguingSource used in some other comparison task? function innerCompare008(f008A, f008B) { (0, _utils.nvdebug)(`A 008: ${JSON.stringify(f008A)}`); (0, _utils.nvdebug)(`B 008: ${JSON.stringify(f008B)}`); if (!isPairableFormOfItem(f008A.formOfItem.code, f008B.formOfItem.code)) { return false; } const mp06Result = mp06Comparison(f008A.publicationStatus.code, f008B.publicationStatus.code); if (mp06Result !== true) { return mp06Result; } return true; function isPairableFormOfItem(formOfItemA, formOfItemB) { // Prevent online and (local) direct electronic resources from merging: // (There are other conflincting values as well, but this is the case I se most likely to cause merges that should not happen.) if (formOfItemA === 'o' && formOfItemB === 'q') { return false; } if (formOfItemA === 'q' && formOfItemB === 'o') { return false; } return true; } // eslint-disable-next-line max-statements function mp06Comparison(mp06A, mp06B) { if (mp06A === mp06B) { return true; } // 'b' (before Christ) is always wrong in our domain if (mp06A === 'b') { return 'B'; } if (mp06B === 'b') { return 'A'; } // After handling 'b', '|' is the ultimate loser: if (mp06A === '|') { return 'B'; } if (mp06B === '|') { return 'A'; } // d < (c or u) < | const continuingResource = compareContinuingResources(mp06A, mp06B); if (continuingResource !== false) { return continuingResource; } // One is a reprint and the other one is not. Abort! /* if (mp06A === 'r' || mp06B === 'r') { return false; } */ const scoreA = scoreSinglePart(mp06A); const scoreB = scoreSinglePart(mp06B); if (scoreA > -1 && scoreB > -1) { if (scoreA > scoreB) { return 'A'; } if (scoreA < scoreB) { return 'B'; } } // Other rules? return true; } function isUnknownOrContinuingResource(mp06) { return ['|', 'c', 'd', 'u'].includes(mp06); } function compareContinuingResources(mp06A, mp06B) { // There should not be pairs here if (!isUnknownOrContinuingResource(mp06A) || !isUnknownOrContinuingResource(mp06B)) { return false; } // d < c or u < | if (mp06A === 'd' || mp06B === '|') { return 'A'; } if (mp06B === 'd' || mp06A === '|') { return 'B'; } // One is 'c' and the other one is 'u'. I'm not sure is one better than the other... return true; } } function scoreSinglePart(mp06) { if (mp06 === 'e' || mp06 === 'r' || mp06 === 't') { // single date return 4; } if (mp06 === 'p' || mp06 === 's') { // single date return 3; } if (mp06 === 'q') { // questionable date return 2; } if (mp06 === 'n') { // unknown date return 1; } return -1; } // check (collect&compare): function check005({ record1, record2 }) { const data1 = get005(record1); const data2 = get005(record2); // Theoretically the record with newer timestamp is the better one. // However, we have n+1 load-fixes etc reasons why this is not reliable, so year is good enough for me. const val1 = getYear(data1); const val2 = getYear(data2); if (val1 > val2) { return 'A'; } if (val2 > val1) { return 'B'; } return true; function getYear(value) { return parseInt(value.substr(0, 4), 10); // YYYY is approximate enough } } function check008({ record1, record2 }) { //nvdebug(`CHECK 008`); const data1 = get008(record1); const data2 = get008(record2); return innerCompare008(data1, data2); } //# sourceMappingURL=controlFields.js.map