UNPKG

@natlibfi/melinda-record-match-validator

Version:

Validates if two records matched by melinda-record-matching can be merged and sets merge priority

316 lines (262 loc) 11.6 kB
import createDebugLogger from 'debug'; import {nvdebug} from './utils'; const debug = createDebugLogger('@natlibfi/melinda-record-match-validator:leader'); const debugDev = debug.extend('dev'); //const debugData = debug.extend('data'); // Descriptions of type of record, bibliographical level and encoding level are taken from official specs: // https://www.loc.gov/marc/bibliographic/bdleader.html const typeOfRecordHash = { 'a': 'Language material', 'c': 'Notated music', 'd': 'Manuscript notated music', 'e': 'Cartographic material', 'f': 'Manuscript cartographic material', 'g': 'Projected medium', 'i': 'Nonmusical sound recording', 'j': 'Musical sound recording', 'k': 'Two-dimensional nonprojectable graphic', 'm': 'Computer file', 'o': 'Kit', 'p': 'Mixed materials', 'r': 'Three-dimensional artifact or naturally occurring object', 't': 'Manuscript language material' }; const bibliographicLevelHash = { // LDR/07 'a': 'Monographic component part', 'b': 'Serial component part', 'c': 'Collection', 'd': 'Subunit', 'i': 'Integrating resource', 'm': 'Monograph/Item', 's': 'Serial' }; // Note: if we have '2' for 'Koneellisesti tuotettu tietue' it should have lower prefence than here const encodingLevelHash = { ' ': 'Full level', '1': 'Full level, material not examined', '2': 'Less-than-full level, material not examined', '3': 'Abbreviated level', '4': 'Core level', '5': 'Partial (preliminary) level', '7': 'Minimal level', '8': 'Prepublication level', 'u': 'Unknown', 'z': 'Not applicable' }; function mapTypeOfRecord(typeOfRecord) { if (typeOfRecord in typeOfRecordHash) { return {level: typeOfRecordHash[typeOfRecord], code: typeOfRecord}; } throw new Error(`Invalid record type ${typeOfRecord}`); } function mapBibliographicLevel(bibliographicLevel) { if (bibliographicLevel in bibliographicLevelHash) { return {level: bibliographicLevelHash[bibliographicLevel], code: bibliographicLevel}; } throw new Error('Invalid record bib level'); } function mapEncodingLevel(encodingLevel) { if (encodingLevel in encodingLevelHash) { return {level: encodingLevelHash[encodingLevel], code: encodingLevel}; } throw new Error('Invalid record completion level'); } export function getTypeOfRecord(record) { const recordTypeRaw = record.leader[6]; // eslint-disable-line prefer-destructuring const result = { typeOfRecord: mapTypeOfRecord(recordTypeRaw) }; return result; } export function getBibliographicLevel(record) { const recordBibLevelRaw = record.leader[7]; // eslint-disable-line prefer-destructuring const result = { bibliographicLevel: mapBibliographicLevel(recordBibLevelRaw) }; return result; } export function getRecordLevel(record) { const recordCompletionLevel = record.leader[17]; // eslint-disable-line prefer-destructuring const result = { encodingLevel: mapEncodingLevel(recordCompletionLevel), prepublicationLevel: getPrepublicationLevel(record, recordCompletionLevel) }; return result; } export function getRecordInfo(record) { const result = { ...getTypeOfRecord(record), ...getBibliographicLevel(record), ...getRecordLevel(record) }; return result; } // PrepublicationLevel should probably be renamed secondaryEncodingLevel or something like that, because // "Koneellisesti tuotettu tietue" records with encodingLevel "2" are not prepublication records as such function getPrepublicationLevel(record, encodingLevel = '8') { const fields = record.get(/^(?:500|594)$/u); if (fields) { if (fields.some(f => f.subfields.some(sf => sf.value.includes('Koneellisesti tuotettu tietue')))) { return {code: '1', level: 'Koneellisesti tuotettu tietue'}; } if (fields.some(f => f.subfields.some(sf => sf.value.includes('TARKISTETTU ENNAKKOTIETO') || sf.value.includes('Tarkistettu ennakkotieto')))) { return {code: '2', level: 'TARKISTETTU ENNAKKOTIETO'}; } if (fields.some(f => f.subfields.some(sf => sf.value.includes('ENNAKKOTIETO') || sf.value.includes('Ennakkotieto')))) { return {code: '3', level: 'ENNAKKOTIETO'}; } // If our encLevel is '8' (for actual prepublication records), let's give a lower prepubLevel if information is not found if (encodingLevel === '8') { return {code: '3', level: 'No prepublication type found'}; } return {code: '0', level: 'Not a prepublication'}; } // If our encLevel is '8' (for actual prepublication records), let's give a lower prepubLevel if information is not found if (encodingLevel === '8') { return {code: '3', level: 'No 500 or 594 fields found, cannot determine prepublication type'}; } return {code: '0', level: 'Not a prepublication'}; } // eslint-disable-next-line max-statements function rateValues(valueA, valueB, rateArray) { debugDev('%o vs %o', valueA, valueB); if (valueA.code === valueB.code) { debugDev('Both same: returning true'); return true; } if (rateArray) { // Preference array, [0] is the best (=1). const ratingOfA = rateArray.indexOf(valueA.code) + 1; const ratingOfB = rateArray.indexOf(valueB.code) + 1; if (ratingOfA === 0) { if (ratingOfB !== 0) { debugDev('A\'s value not found in array. Return B'); return 'B'; } //debugDev('Value not found from array'); return false; } if (ratingOfB === 0) { debugDev('B\'s value not found in array. Return A'); return 'A'; } if (ratingOfA < ratingOfB) { debugDev('A better: returning A'); return 'A'; } debugDev('B better: returning B'); return 'B'; } debugDev('Both different: returning false'); return false; } function compareTypeOfRecord(a, b) { debugDev('Record A type: %o', a); debugDev('Record B type: %o', b); //nvdebug(`type of record: '${a}' vs '${b}', debugDev`); // rateValues, no rateArray: no preference, just validation false - true return rateValues(a, b); } function compareBibliographicLevel(a, b) { debugDev('Record A bib level: %o', a); debugDev('Record B bib level: %o', b); // rateValues, no rateArray: no preference, jsut validation false - true return rateValues(a, b); } // eslint-disable-next-line max-params function compareEncodingLevel(a, b, prePubA, prePubB, recordSourceA, recordSourceB) { debugDev('Record A completion level: %o', a); debugDev('Record B completion level: %o', b); nvdebug(prePubA ? `Record A prepub level: ${JSON.stringify(prePubA)}` : 'N/A', debugDev); nvdebug(prePubB ? `Record B prepub level: ${JSON.stringify(prePubB)}` : 'N/A', debugDev); nvdebug(recordSourceA ? `Record A external type: ${JSON.stringify(recordSourceA)}` : 'N/A', debugDev); nvdebug(recordSourceB ? `Record B external type: ${JSON.stringify(recordSourceB)}` : 'N/A', debugDev); // eslint-disable-next-line no-mixed-operators, no-extra-parens if (prePubA && prePubB && (a.code === '8' && b.code === '8') || (a.code === '2' && b.code === '2')) { // Handle exception first: all prepublications are not equal! const prePubValue = rateValues(prePubA, prePubB, ['0', '1', '2', '3']); // we'll check recordSource only if we have '8' or '2' records which have same prePubValue // and prepubLevel is something else than '0' (not a prepublication) if (prePubValue === true && prePubA.code !== '0' && prePubB.code !== '0') { const valueA = {code: recordSourceA}; const valueB = {code: recordSourceB}; const rateArray = ['incomingRecord', 'databaseRecord', undefined]; return rateValues(valueA, valueB, rateArray); } return prePubValue; } // Note: For record import stuff we'll propably have 'Koneellisesti tuotettu tietue' encoding level as '2' - this needs to be reorganized! // Best first, see encodingLevelHash above. // const rateArray = [' ', '1', '2', '3', '4', '5', '7', 'u', 'z', '8']; const rateArray = [' ', '1', '4', '5', '2', '7', '3', 'u', 'z', '8']; // MET-145 return rateValues(a, b, rateArray); } export function compareLeader(recordValuesA, recordValuesB) { const f000A = recordValuesA['000']; const f000B = recordValuesB['000']; const result = { typeOfRecord: compareTypeOfRecord(f000A.typeOfRecord, f000B.typeOfRecord), bibliographicLevel: compareBibliographicLevel(f000A.bibliographicLevel, f000B.bibliographicLevel), encodingLevel: compareEncodingLevel(f000A.encodingLevel, f000B.encodingLevel, f000A.prepublicationLevel, f000B.prepublicationLevel) }; //nvdebug('NV WP9', debugDev);// eslint-disable-line no-console //nvdebug(JSON.stringify(result), debugDev); // eslint-disable-line no-console return result; } // check typeOfRecord (LDR/06) export function checkTypeOfRecord({record1, record2}) { const recordInfo1 = getTypeOfRecord(record1); const recordInfo2 = getTypeOfRecord(record2); return compareTypeOfRecord(recordInfo1.typeOfRecord, recordInfo2.typeOfRecord); } // check bibliographicLevel (LDR/07) export function checkBibliographicLevel({record1, record2}) { const recordInfo1 = getBibliographicLevel(record1); const recordInfo2 = getBibliographicLevel(record2); return compareBibliographicLevel(recordInfo1.bibliographicLevel, recordInfo2.bibliographicLevel); } // Check record encoding level + prepublication level, mostly for preference export function checkRecordLevel({record1, record2, record1External = {}, record2External = {}}) { const recordInfo1 = getRecordLevel(record1); const recordInfo2 = getRecordLevel(record2); const recordSource1 = record1External.recordSource || undefined; const recordSource2 = record2External.recordSource || undefined; return compareEncodingLevel(recordInfo1.encodingLevel, recordInfo2.encodingLevel, recordInfo1.prepublicationLevel, recordInfo2.prepublicationLevel, recordSource1, recordSource2); } // Check all values from leader export function checkLeader({record1, record2, checkPreference = true, record1External = {}, record2External = {}}) { const recordInfo1 = getRecordInfo(record1); const recordInfo2 = getRecordInfo(record2); const recordSource1 = record1External.recordSource || undefined; const recordSource2 = record2External.recordSource || undefined; debugDev(`checkLeader()`); // DEVELOP: this could use checkTypeOfRecord? if (recordInfo1.typeOfRecord.code !== recordInfo2.typeOfRecord.code) { debugDev(`LDR: type of record failed!`); // eslint-disable-line no-console return false; } // DEVELOP: this could use checkBibliographicLevel? if (recordInfo1.bibliographicLevel.code !== recordInfo2.bibliographicLevel.code) { debugDev(`LDR: bibliographical level failed!`); // eslint-disable-line no-console return false; } const encodingLevelPreference = compareEncodingLevel(recordInfo1.encodingLevel, recordInfo2.encodingLevel, recordInfo1.prepublicationLevel, recordInfo2.prepublicationLevel, recordSource1, recordSource2); if (encodingLevelPreference === false) { debugDev(`LDR: encoding level failed!`); return false; } return checkPreference ? encodingLevelPreference : true; /* if (checkPreference) { return encodingLevelPreference; } return true; */ // NB! Should we handle LDR/05 (record status) value p - Increase in encoding level from prepublication? } /* // An old comment with updates keys: '000': { 'bibliographicLevel': true, 'encodingLevel': 'A', // A has better value 'recordState': true, // What is this? Probably something that got dropped later on... 'typeOfRecord': true } */