@natlibfi/melinda-record-match-validator
Version:
Validates if two records matched by melinda-record-matching can be merged and sets merge priority
547 lines (499 loc) • 22.3 kB
JavaScript
/* eslint-disable max-lines */
import createDebugLogger from 'debug';
import {isDeletedRecord, isTestRecord, isComponentRecord} from '@natlibfi/melinda-commons';
import {checkSID} from './compareFunctions/compareFieldSID.js';
import {checkLOW, checkLOWinternal} from './compareFunctions/compareFieldLOW.js';
import {checkCAT} from './validators/fieldCAT.js';
import {check040b, check040e} from './compareFunctions/compareField040.js';
import {checkAllTitleFeatures} from './compareFunctions/compareTitle.js';
import {checkPublisher} from './compareFunctions/compareField26X.js';
//import {getSubfieldValues} from './collectFunctions/collectUtils.js';
//import {collectRecordValues} from './collectRecordValues.js';
//import {compareRecordValues} from './compareRecordValues.js';
//import {validateCompareResults} from './validateRecordCompareResults.js';
import {check042} from './compareFunctions/compareField042.js';
import {check336, check337, check338} from './compareFunctions/compareField33X.js';
import {check773, check773Internal} from './validators/field773.js';
//import {check984} from './compareFunctions/compareField984.js';
import {checkLeader, checkTypeOfRecord, checkRecordLevel, checkBibliographicLevel} from './compareFunctions/compareLeader.js';
import {check005, check008} from './compareFunctions/compareControlFields.js';
import {compareRecordsPartSetFeatures} from './validators/partsAndSets.js';
import {performAudioSanityCheck} from './compareFunctions/sanityCheckAudio.js';
import {performDaisySanityCheck} from './compareFunctions/sanityCheckDaisy.js';
import {performDvdSanityCheck} from './compareFunctions/sanityCheckDvd.js';
import {performIsbnQualifierCheck} from './compareFunctions/sanityCheckIsbnQualifer.js';
//import {checkLanguage} from './validators/language.js';
import { getCheckFeature } from './validators/matchingFeatureChecks.js';
//import { checkAllFeatures } from './validators/matchingFeatureChecks.js';
const debug = createDebugLogger('@natlibfi/melinda-record-match-validator:index');
const debugDev = debug.extend('dev');
//const debugData = debug.extend('data');
function checkExistence({record1, record2}) {
if (record1 === undefined || record2 === undefined) {
return false;
}
if (isDeletedRecord(record1) || isDeletedRecord(record2)) {
return false;
}
return true;
}
function checkTestRecord({record1, record2}) {
if (isTestRecord(record1) !== isTestRecord(record2)) {
return false;
}
return true;
}
function checkHostComponent({record1, record2}) {
if (isComponentRecord(record1, false, ['973']) !== isComponentRecord(record2, false, ['973'])) {
return false;
}
return true;
}
// DEVELOP: move comparisonTask metadata mainly to the function files
// DEVELOP: add tag information for highlighting problematic fields / positions / subfields for human user
// DEVELOP: multilingual human readable messages
/*
name: short name for comparison task
description: longer, original description for comparison task
validation: comparison task is used for validation
preference: comparison task is used for choosing preferred record
manual: is comparison check usable in manual (internal) merge, type of fail if used [true/error/warning/false], defaults to: true/error if undefined
internal: is comparison check usable in internal merge (ie. merging two database records) [true/false], defaults to true if undefined
import: is comparison check usable in import merge (ie. merging an incoming record and a database record) [true/false], defaults to true if undefined
preference_message_fi: human readable message in Finnish for comparing record preference for merging
validation_message_fi: human readable message in Finnish for validating records for merging
DEVELOP: tags: tags to highlight fields/subfields/positions of fields and field parts that caused the matchValidation error/warning
*/
const comparisonTasks = [ // NB! These are/should be in priority order for recordImport, which checks only until first failure!
// undefined or deleted records cannot be merged (both automatic and human merge)
{'name': 'existence',
'description': 'existence (validation only)',
'function': checkExistence,
'validation': true,
'preference': false,
'preference_message_fi': '',
'validation_message_fi': 'poistettuja tietueita ei voi yhdistää',
'tags': [{'tag': 'STA'}, {'tag': 'DEL'}, {'tag': 'LDR', 'chars': ['5']}]},
// test records and non test records should not be merged
{'name': 'test record',
'description': 'test record',
'function': checkTestRecord,
'validation': true,
'preference': false,
'import': true,
'internal': true,
'manual': 'error',
'preference_message_fi': '',
'validation_message_fi': 'testitietuetta ja normaalia tietuetta ei voi yhdistää',
'tags': [{'tag': 'STA'}]},
// host and component records should not be merged
{'name': 'host/component',
'description': 'host/component record',
'function': checkHostComponent,
'validation': true,
'preference': false,
'import': true,
'internal': true,
'manual': 'error',
'preference_message_fi': '',
'validation_message_fi': 'osakohdetta ja ei-osakohdetta ei voi yhdistää',
'tags': [{'tag': '773'}, {'tag': '973'}, {'tag': 'LDR', 'chars': ['7']}]},
// checks record type LDR/06 && bibliographic level LDR/07 (validation) and LDR/17 for encoding level (preference)s
// - fail merge if LDR/006-7 are mismatch
// - preference based on encoding level and more nuanced prepublication level for prepub records
// Prioritize LDR/17 (encoding level)
{'name': 'leader',
'description': 'leader (validation and preference)',
'function': checkLeader,
'validation': true,
'preference': true,
'manual': false,
'import': true,
'internal': true,
'validation_message_fi': 'ainestotyypiltään tai bibliografiselta (LDR/06-07) tasoltaan eroavia tietueita ei voi yhdistää',
'preference_message_fi': 'suosi koodaus- ja ennakkotietotasoltaan (LDR/17) parempaa tietuetta'},
// Singular leader comparisons for Human/internal merge
// leader typeOfRecord LDR/006
// do not use same time as checkLeader that checks all three leader values
// we need to error these in MergeUI as mergeReducers refuse to handle records with differing LDR/06
{'name': 'typeOfRecord',
'description': 'leader: typeOfRecord (validation)',
'function': checkTypeOfRecord,
'validation': true,
'preference': false,
//'manual': 'warning',
'manual': 'error',
'import': false,
'internal': true,
'validation_message_fi': 'aineistotyypiltään (LDR/06) eroavia tietueita ei voi yhdistää',
'preference_message_fi': ''},
// leader bibliographicLevel LDR/007
// do not use same time as checkLeader that checks all three leader values
// Currently not in use, we check components with different check
{'name': 'bibliographicLevel',
'description': 'leader: bibliographicLevel (validation)',
'function': checkBibliographicLevel,
'validation': true,
'preference': false,
'import': false,
'internal': false,
'manual': 'error',
'validation_message_fi': 'bibliografiselta tasoltaan (LDR/07) eroavia tietueita ei voi yhdistää',
'preference_message_fi': ''},
// leader encodingLevel LDR/017 + f500/f594
// do not use same time as checkLeader that checks all three leader values
{'name': 'recordLevel',
'description': 'leader + 500/594: recordLevel (preference)',
'function': checkRecordLevel,
'validation': false,
'preference': true,
'import': false,
'internal': true,
'manual': 'warning',
'validation_message_fi': '',
'preference_message_fi': 'suosi koodaus- ja ennakkotietotasoltaan parempaa tietuetta (LDR/17)'},
// just preference also for human merge (we like records with 264 instead of 260, they are probably more RDA-compatible)
// Bit high on the preference list, isn't it?
{'name': 'RDA from publisher',
'description': 'publisher (264>260) (preference only)',
'function': checkPublisher,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': false, // let's not give too many preference warnings for a human user
'preference_message_fi': 'suosi tietuetta, jossa julkaisutiedot ovat kentässä 264',
'validation_message_fi': ''},
// what are we checking here? could probably be a warning for human merge
// - fail merging online and direct using electronical resources (008/23 or 008/29 form of item)
// - fail merge if 008/06 type of date/publication status codes are a severe mismatch
// - preference from 008/06 type of date/publication status codes
// - gathers 008/39 cataloiguingSource, but does do anything with it?
{'name': 'f008',
'description': '008 test (validation and preference)',
'function': check008,
'validation': true,
'preference': true,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': 'suosi tietuetta, jossa on tarkemmin ilmoitettu julkaisuajan tyyppi/julkaisun tila (008/06)',
'validation_message_fi': 'tietueita, joissa on ristiriitainen julkaisuajan tyyppi/julkaisun tila (008/06) ei voi yhdistää'},
// This test checks is just for preference despite its description!
// Priority order: FIKKA > ANY > NONE
{'name': 'LOW-for-preference',
'description': 'LOW test (preference)',
'function': checkLOW,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': true,
'preference_message_fi': 'suosi tietuetta, jossa on Kansalliskirjaston tietokantatunnus (LOW) (tai tietuetta, jossa ylipäänsä on joku tietokantatunnus)',
'validation_message_fi': ''},
// database internal merge cannot merge two records with same low
{'name': 'LOW-validation-for-internal',
'description': 'LOW test (validation for internal)',
'function': checkLOWinternal,
'validation': true,
'preference': false,
'import': false,
'internal': true,
'manual': 'error',
'preference_message_fi': '',
'validation_message_fi': 'tietueita, joissa on saman paikalliskannan tietokantatunnus (LOW), ei voi yhdistää'},
// This test check 042 to preference
{'name': 'f042-authentication-code',
'description': 'field 042: authentication code (preference only)',
'function': check042,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': true,
'preference_message_fi': 'suosi tietuetta, jossa on Kansallisbibliografian tai Kansallisdiskografian autentikaatiokoodi (042)',
'validation_message_fi': ''},
{'name': 'CAT',
'description': 'CAT test (preference only)',
'function': checkCAT,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': false, // let's not give too many preference warnings for a human cataloger
'preference_message_fi': 'suosi tietuetta, jolla on paremmat kuvailuhistoriatiedot',
'validation_message_fi': ''},
// NB! I'd like to have a test for 008/06, but them specs for it are elusive?
{'name': 'title',
'description': 'field 245 (title)',
'function': checkAllTitleFeatures,
'validation': true,
'preference': false,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, niiden nimeketiedot (245) eroavat'},
// Do not use old check f245 same time as checkAllTitleFeatures
//{'name': 'title-old', 'description': 'field 245 (title)', 'function': check245, 'validation': true, 'preference': false, 'manual': 'warning'},
// human merge: warning
{'name': 'f336',
'description': 'field 336 (content type) test (validation and preference)',
'function': check336,
'validation': true,
'preference': true,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': 'suosi tietuetta, jolla on tarkemmat sisältötyyppitiedot (336)',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, niiden sisältötyyppitiedot (336) eroavat'},
// human merge: warning
{'name': 'f337',
'description': 'field 337 (media type) test (validation and preference)',
'function': check337,
'validation': true,
'preference': true,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': 'suosi tietuetta, jolla on tarkemmat mediatyyppitiedot (337)',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, niiden mediatyyppitiedot (337) eroavat'},
// human merge: warning
{'name': 'f338',
'description': 'field 338 (carrier type) test (validation and preference)',
'function': check338,
'validation': true,
'preference': true,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': 'suosi tietuetta, jolla on tarkemmat tallennetyyppitiedot (338)',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, niiden tallennetyyppitiedot (338) eroavat'},
// human merge: warning for subfields q&g - $w actually should be different ...
{'name': 'f773-for-internal',
'description': '773 $wgq test (validation only)',
'function': check773Internal,
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, osakohteen sijaintitiedot eroavat (773)'},
{'name': 'f773-for-import',
'description': '773 $wgq test (validation only)',
'function': check773,
'validation': true,
'preference': false,
'internal': false,
'import': true,
'manual': false,
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, osakohteen sijaintitiedot eroavat (773)'},
{'name': 'f040b',
'description': '040$b (language of cataloging) (preference only)',
'function': check040b,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': true,
'preference_message_fi': 'suosi tietuetta, jolla on soveltuvin kuvailukieli (040)',
'validation_message_fi': ''},
{'name': 'f040e',
'description': '040$e (description conventions) (preference only)',
'function': check040e,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': true,
'preference_message_fi': 'suosi tietuetta, jonka kuvailusäännöiksi on merkitty RDA (040)',
'validation_message_fi': ''},
// SID for import (do not use for manual database internal merge)
// - fail merge for different SIDs from same database
// set preference for record that has most commons SIDs
{'name': 'fSID-for-import',
'description': 'SID test (validation and preference), for import only',
'function': checkSID,
'validation': true,
'preference': true,
'internal': false,
'import': true,
'manual': false,
'preference_message_fi': 'suosi tietuetta, jolla on enemmän linkkejä vastintietueisiin paikalliskannoissa',
'validation_message_fi': 'tietueita, joilla on samassa paikalliskannassa eri vastintietue ei voi yhdistää (SID)'},
// preference for record that's updated more recently
{'name': 'f005',
'description': '005 timestamp test (preference)',
'function': check005,
'validation': false,
'preference': true,
'internal': true,
'import': true,
'manual': false, // let's not give too many preference warnings for a human cataloger
'preference_message_fi': 'suosi tietuetta, jota on päivitetty viimeksi',
'validation_message_fi': ''},
// human merge: warning
// - fail merge, for CD vs LP record
{'name': 'audio-sanity',
'description': 'audio sanity check (validation only)',
'function': performAudioSanityCheck,
'validation': true,
'preference': false,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tietueissa on kuvailtu CD- ja LP-levy, tarkista voiko ne yhdistää'},
// human merge: warning
// - fail merge, for daisy-audiobook vs generic audiobook
{'name': 'daisy-sanity',
'description': 'Daisy sanity check (validation only)',
'function': performDaisySanityCheck,
'validation': true,
'preference': false,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tietueissa on kuvailtu yleinen ja Daisy-äänikirja, tarkista voiko ne yhdistää'},
// human merge: warning
// - fail merge, for DVD vs Blueray video discs
{'name': 'dvd-blueray-sanity',
'description': 'DVD vs Blu-Ray sanity check (validation only)',
'function': performDvdSanityCheck,
'validation': true,
'preference': false,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tietueissa on kuvailtu DVD- ja Bluray-levy, tarkista voiko ne yhdistää'},
// human merge: warning
// - fail merge, for mismatching ISBN qualifiers
{'name': 'isbn-qualifier',
'description': 'ISBN qualifier sanity check (validation only)',
'function': performIsbnQualifierCheck,
'validation': true,
'preference': false,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tietueissa on eroava ISBN-tarkenne (020), tarkista voiko ne yhdistää'},
// human merge: warning
// - fail merge, part of a multipart monograph vs whole set of multipart monographs
{'name': 'parts-sets',
'description': 'Parts vs sets test (validation)',
'function': compareRecordsPartSetFeatures,
'validation': true,
'preference': false,
'internal': true,
'import': true,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tietueissa on kuvailtu yksittäinen moniosaisen monografian osa ja moniosainen monografia kokonaisuutena, tarkista voiko ne yhdistää'},
// human merge: warning
// import: do not use, this is done in matcher
// - fail merge, if languages in records differ too much
{'name': 'language',
'description': 'Language (validation)',
'function': getCheckFeature({featureName: 'language'}),
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää, kielitiedot eroavat (008, 041)'},
// human merge: warning
// import: do not use, this is done in matcher
// - warn if ISBNs differ too much
{'name': 'ISBN',
'description': 'ISBN (validation)',
'function': getCheckFeature({featureName: 'isbn'}),
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää: eroava ISBN (020)'},
// human merge: warning
// import: do not use, this is done in matcher
// - warn if ISSNs differ too much
{'name': 'ISSN',
'description': 'ISSN (validation)',
'function': getCheckFeature({featureName: 'issn'}),
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää: eroava ISSN (022)'},
// human merge: warning
// import: do not use, this is done in matcher
// - warn if ISBNs differ too much
{'name': 'otherStandardIdentifier',
'description': 'otherStandardIdentifier (validation)',
'function': getCheckFeature({featureName: 'otherStandardIdentifier'}),
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'tarkista voiko tietueet yhdistää: eroava muu standarditunniste (024)'}
/*
{'name': 'allMatchingFeatures',
'description': 'allMatchingFeatures (validation)',
'function': checkAllFeatures,
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'foobar'},
*/
];
export const comparisonTasksTable = {
recordImport: [...comparisonTasks].filter(isUsableForImport),
// merge two records existing in database together, checked by human user in UI
humanMerge: [...comparisonTasks.filter(isUsableForInternal).filter(isUsableForManual)]
};
debugDev(`------------ RECORD IMPORT --------`);
debugDev(`comparisonTasksTable.recordImport has ${comparisonTasksTable.recordImport.length} comparison tasks:`);
debugDev(`${comparisonTasksTable.recordImport.map((task) => task.description).join('\n')}`);
debugDev(`------------ HUMAN MERGE --------`);
debugDev(`comparisonTasksTable.humanMerge has ${comparisonTasksTable.humanMerge.length} comparison tasks:`);
debugDev(`${comparisonTasksTable.humanMerge.map((task) => task.description).join('\n')}`);
// Manual merge: merge done manually in an UI
function isUsableForManual(task) {
if (task.manual !== undefined && task.manual === false) {
debugDev(`${task.name} has manual: ${task.manual}`);
return false;
}
debugDev(`${task.name} has manual: ${task.manual}`);
return true;
}
// Internal merge: merging two records in the database together
function isUsableForInternal(task) {
if (task.internal !== undefined && task.internal === false) {
debugDev(`${task.name} has internal: ${task.internal}`);
return false;
}
debugDev(`${task.name} has internal: ${task.internal}`);
return true;
}
// Import merge: merging incoming record and database record together
function isUsableForImport(task) {
if (task.import !== undefined && task.import === false) {
debugDev(`${task.name} has import: ${task.import}`);
return false;
}
debugDev(`${task.name} has import: ${task.import}`);
return true;
}