UNPKG

@natlibfi/melinda-record-match-validator

Version:

Validates if two records matched by melinda-record-matching can be merged and sets merge priority

542 lines (541 loc) 20.8 kB
import createDebugLogger from "debug"; import { isDeletedRecord, isTestRecord, isComponentRecord } from "@natlibfi/melinda-commons"; import { checkSID } from "./compareFunctions/compareFieldSID.js"; import { checkLOW, checkLOWinternal } from "./compareFunctions/compareFieldLOW.js"; import { checkCAT } from "./validators/fieldCAT.js"; import { check040b, check040e } from "./compareFunctions/compareField040.js"; import { checkAllTitleFeatures } from "./compareFunctions/compareTitle.js"; import { checkPublisher } from "./compareFunctions/compareField26X.js"; import { check042 } from "./compareFunctions/compareField042.js"; import { check336, check337, check338 } from "./compareFunctions/compareField33X.js"; import { check773, check773Internal } from "./validators/field773.js"; import { checkLeader, checkTypeOfRecord, checkRecordLevel, checkBibliographicLevel } from "./compareFunctions/compareLeader.js"; import { check005, check008 } from "./compareFunctions/compareControlFields.js"; import { compareRecordsPartSetFeatures } from "./validators/partsAndSets.js"; import { performAudioSanityCheck } from "./compareFunctions/sanityCheckAudio.js"; import { performDaisySanityCheck } from "./compareFunctions/sanityCheckDaisy.js"; import { performDvdSanityCheck } from "./compareFunctions/sanityCheckDvd.js"; import { performIsbnQualifierCheck } from "./compareFunctions/sanityCheckIsbnQualifer.js"; import { getCheckFeature } from "./validators/matchingFeatureChecks.js"; const debug = createDebugLogger("@natlibfi/melinda-record-match-validator:index"); const debugDev = debug.extend("dev"); function checkExistence({ record1, record2 }) { if (record1 === void 0 || record2 === void 0) { return false; } if (isDeletedRecord(record1) || isDeletedRecord(record2)) { return false; } return true; } function checkTestRecord({ record1, record2 }) { if (isTestRecord(record1) !== isTestRecord(record2)) { return false; } return true; } function checkHostComponent({ record1, record2 }) { if (isComponentRecord(record1, false, ["973"]) !== isComponentRecord(record2, false, ["973"])) { return false; } return true; } const comparisonTasks = [ // NB! These are/should be in priority order for recordImport, which checks only until first failure! // undefined or deleted records cannot be merged (both automatic and human merge) { "name": "existence", "description": "existence (validation only)", "function": checkExistence, "validation": true, "preference": false, "preference_message_fi": "", "validation_message_fi": "poistettuja tietueita ei voi yhdist\xE4\xE4", "tags": [{ "tag": "STA" }, { "tag": "DEL" }, { "tag": "LDR", "chars": ["5"] }] }, // test records and non test records should not be merged { "name": "test record", "description": "test record", "function": checkTestRecord, "validation": true, "preference": false, "import": true, "internal": true, "manual": "error", "preference_message_fi": "", "validation_message_fi": "testitietuetta ja normaalia tietuetta ei voi yhdist\xE4\xE4", "tags": [{ "tag": "STA" }] }, // host and component records should not be merged { "name": "host/component", "description": "host/component record", "function": checkHostComponent, "validation": true, "preference": false, "import": true, "internal": true, "manual": "error", "preference_message_fi": "", "validation_message_fi": "osakohdetta ja ei-osakohdetta ei voi yhdist\xE4\xE4", "tags": [{ "tag": "773" }, { "tag": "973" }, { "tag": "LDR", "chars": ["7"] }] }, // checks record type LDR/06 && bibliographic level LDR/07 (validation) and LDR/17 for encoding level (preference)s // - fail merge if LDR/006-7 are mismatch // - preference based on encoding level and more nuanced prepublication level for prepub records // Prioritize LDR/17 (encoding level) { "name": "leader", "description": "leader (validation and preference)", "function": checkLeader, "validation": true, "preference": true, "manual": false, "import": true, "internal": true, "validation_message_fi": "ainestotyypilt\xE4\xE4n tai bibliografiselta (LDR/06-07) tasoltaan eroavia tietueita ei voi yhdist\xE4\xE4", "preference_message_fi": "suosi koodaus- ja ennakkotietotasoltaan (LDR/17) parempaa tietuetta" }, // Singular leader comparisons for Human/internal merge // leader typeOfRecord LDR/006 // do not use same time as checkLeader that checks all three leader values // we need to error these in MergeUI as mergeReducers refuse to handle records with differing LDR/06 { "name": "typeOfRecord", "description": "leader: typeOfRecord (validation)", "function": checkTypeOfRecord, "validation": true, "preference": false, //'manual': 'warning', "manual": "error", "import": false, "internal": true, "validation_message_fi": "aineistotyypilt\xE4\xE4n (LDR/06) eroavia tietueita ei voi yhdist\xE4\xE4", "preference_message_fi": "" }, // leader bibliographicLevel LDR/007 // do not use same time as checkLeader that checks all three leader values // Currently not in use, we check components with different check { "name": "bibliographicLevel", "description": "leader: bibliographicLevel (validation)", "function": checkBibliographicLevel, "validation": true, "preference": false, "import": false, "internal": false, "manual": "error", "validation_message_fi": "bibliografiselta tasoltaan (LDR/07) eroavia tietueita ei voi yhdist\xE4\xE4", "preference_message_fi": "" }, // leader encodingLevel LDR/017 + f500/f594 // do not use same time as checkLeader that checks all three leader values { "name": "recordLevel", "description": "leader + 500/594: recordLevel (preference)", "function": checkRecordLevel, "validation": false, "preference": true, "import": false, "internal": true, "manual": "warning", "validation_message_fi": "", "preference_message_fi": "suosi koodaus- ja ennakkotietotasoltaan parempaa tietuetta (LDR/17)" }, // just preference also for human merge (we like records with 264 instead of 260, they are probably more RDA-compatible) // Bit high on the preference list, isn't it? { "name": "RDA from publisher", "description": "publisher (264>260) (preference only)", "function": checkPublisher, "validation": false, "preference": true, "internal": true, "import": true, "manual": false, // let's not give too many preference warnings for a human user "preference_message_fi": "suosi tietuetta, jossa julkaisutiedot ovat kent\xE4ss\xE4 264", "validation_message_fi": "" }, // what are we checking here? could probably be a warning for human merge // - fail merging online and direct using electronical resources (008/23 or 008/29 form of item) // - fail merge if 008/06 type of date/publication status codes are a severe mismatch // - preference from 008/06 type of date/publication status codes // - gathers 008/39 cataloiguingSource, but does do anything with it? { "name": "f008", "description": "008 test (validation and preference)", "function": check008, "validation": true, "preference": true, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "suosi tietuetta, jossa on tarkemmin ilmoitettu julkaisuajan tyyppi/julkaisun tila (008/06)", "validation_message_fi": "tietueita, joissa on ristiriitainen julkaisuajan tyyppi/julkaisun tila (008/06) ei voi yhdist\xE4\xE4" }, // This test checks is just for preference despite its description! // Priority order: FIKKA > ANY > NONE { "name": "LOW-for-preference", "description": "LOW test (preference)", "function": checkLOW, "validation": false, "preference": true, "internal": true, "import": true, "manual": true, "preference_message_fi": "suosi tietuetta, jossa on Kansalliskirjaston tietokantatunnus (LOW) (tai tietuetta, jossa ylip\xE4\xE4ns\xE4 on joku tietokantatunnus)", "validation_message_fi": "" }, // database internal merge cannot merge two records with same low { "name": "LOW-validation-for-internal", "description": "LOW test (validation for internal)", "function": checkLOWinternal, "validation": true, "preference": false, "import": false, "internal": true, "manual": "error", "preference_message_fi": "", "validation_message_fi": "tietueita, joissa on saman paikalliskannan tietokantatunnus (LOW), ei voi yhdist\xE4\xE4" }, // This test check 042 to preference { "name": "f042-authentication-code", "description": "field 042: authentication code (preference only)", "function": check042, "validation": false, "preference": true, "internal": true, "import": true, "manual": true, "preference_message_fi": "suosi tietuetta, jossa on Kansallisbibliografian tai Kansallisdiskografian autentikaatiokoodi (042)", "validation_message_fi": "" }, { "name": "CAT", "description": "CAT test (preference only)", "function": checkCAT, "validation": false, "preference": true, "internal": true, "import": true, "manual": false, // let's not give too many preference warnings for a human cataloger "preference_message_fi": "suosi tietuetta, jolla on paremmat kuvailuhistoriatiedot", "validation_message_fi": "" }, // NB! I'd like to have a test for 008/06, but them specs for it are elusive? { "name": "title", "description": "field 245 (title)", "function": checkAllTitleFeatures, "validation": true, "preference": false, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden nimeketiedot (245) eroavat" }, // Do not use old check f245 same time as checkAllTitleFeatures //{'name': 'title-old', 'description': 'field 245 (title)', 'function': check245, 'validation': true, 'preference': false, 'manual': 'warning'}, // human merge: warning { "name": "f336", "description": "field 336 (content type) test (validation and preference)", "function": check336, "validation": true, "preference": true, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "suosi tietuetta, jolla on tarkemmat sis\xE4lt\xF6tyyppitiedot (336)", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden sis\xE4lt\xF6tyyppitiedot (336) eroavat" }, // human merge: warning { "name": "f337", "description": "field 337 (media type) test (validation and preference)", "function": check337, "validation": true, "preference": true, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "suosi tietuetta, jolla on tarkemmat mediatyyppitiedot (337)", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden mediatyyppitiedot (337) eroavat" }, // human merge: warning { "name": "f338", "description": "field 338 (carrier type) test (validation and preference)", "function": check338, "validation": true, "preference": true, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "suosi tietuetta, jolla on tarkemmat tallennetyyppitiedot (338)", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden tallennetyyppitiedot (338) eroavat" }, // human merge: warning for subfields q&g - $w actually should be different ... { "name": "f773-for-internal", "description": "773 $wgq test (validation only)", "function": check773Internal, "validation": true, "preference": false, "internal": true, "import": false, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, osakohteen sijaintitiedot eroavat (773)" }, { "name": "f773-for-import", "description": "773 $wgq test (validation only)", "function": check773, "validation": true, "preference": false, "internal": false, "import": true, "manual": false, "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, osakohteen sijaintitiedot eroavat (773)" }, { "name": "f040b", "description": "040$b (language of cataloging) (preference only)", "function": check040b, "validation": false, "preference": true, "internal": true, "import": true, "manual": true, "preference_message_fi": "suosi tietuetta, jolla on soveltuvin kuvailukieli (040)", "validation_message_fi": "" }, { "name": "f040e", "description": "040$e (description conventions) (preference only)", "function": check040e, "validation": false, "preference": true, "internal": true, "import": true, "manual": true, "preference_message_fi": "suosi tietuetta, jonka kuvailus\xE4\xE4nn\xF6iksi on merkitty RDA (040)", "validation_message_fi": "" }, // SID for import (do not use for manual database internal merge) // - fail merge for different SIDs from same database // set preference for record that has most commons SIDs { "name": "fSID-for-import", "description": "SID test (validation and preference), for import only", "function": checkSID, "validation": true, "preference": true, "internal": false, "import": true, "manual": false, "preference_message_fi": "suosi tietuetta, jolla on enemm\xE4n linkkej\xE4 vastintietueisiin paikalliskannoissa", "validation_message_fi": "tietueita, joilla on samassa paikalliskannassa eri vastintietue ei voi yhdist\xE4\xE4 (SID)" }, // preference for record that's updated more recently { "name": "f005", "description": "005 timestamp test (preference)", "function": check005, "validation": false, "preference": true, "internal": true, "import": true, "manual": false, // let's not give too many preference warnings for a human cataloger "preference_message_fi": "suosi tietuetta, jota on p\xE4ivitetty viimeksi", "validation_message_fi": "" }, // human merge: warning // - fail merge, for CD vs LP record { "name": "audio-sanity", "description": "audio sanity check (validation only)", "function": performAudioSanityCheck, "validation": true, "preference": false, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tietueissa on kuvailtu CD- ja LP-levy, tarkista voiko ne yhdist\xE4\xE4" }, // human merge: warning // - fail merge, for daisy-audiobook vs generic audiobook { "name": "daisy-sanity", "description": "Daisy sanity check (validation only)", "function": performDaisySanityCheck, "validation": true, "preference": false, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tietueissa on kuvailtu yleinen ja Daisy-\xE4\xE4nikirja, tarkista voiko ne yhdist\xE4\xE4" }, // human merge: warning // - fail merge, for DVD vs Blueray video discs { "name": "dvd-blueray-sanity", "description": "DVD vs Blu-Ray sanity check (validation only)", "function": performDvdSanityCheck, "validation": true, "preference": false, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tietueissa on kuvailtu DVD- ja Bluray-levy, tarkista voiko ne yhdist\xE4\xE4" }, // human merge: warning // - fail merge, for mismatching ISBN qualifiers { "name": "isbn-qualifier", "description": "ISBN qualifier sanity check (validation only)", "function": performIsbnQualifierCheck, "validation": true, "preference": false, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tietueissa on eroava ISBN-tarkenne (020), tarkista voiko ne yhdist\xE4\xE4" }, // human merge: warning // - fail merge, part of a multipart monograph vs whole set of multipart monographs { "name": "parts-sets", "description": "Parts vs sets test (validation)", "function": compareRecordsPartSetFeatures, "validation": true, "preference": false, "internal": true, "import": true, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tietueissa on kuvailtu yksitt\xE4inen moniosaisen monografian osa ja moniosainen monografia kokonaisuutena, tarkista voiko ne yhdist\xE4\xE4" }, // human merge: warning // import: do not use, this is done in matcher // - fail merge, if languages in records differ too much { "name": "language", "description": "Language (validation)", "function": getCheckFeature({ featureName: "language" }), "validation": true, "preference": false, "internal": true, "import": false, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, kielitiedot eroavat (008, 041)" }, // human merge: warning // import: do not use, this is done in matcher // - warn if ISBNs differ too much { "name": "ISBN", "description": "ISBN (validation)", "function": getCheckFeature({ featureName: "isbn" }), "validation": true, "preference": false, "internal": true, "import": false, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4: eroava ISBN (020)" }, // human merge: warning // import: do not use, this is done in matcher // - warn if ISSNs differ too much { "name": "ISSN", "description": "ISSN (validation)", "function": getCheckFeature({ featureName: "issn" }), "validation": true, "preference": false, "internal": true, "import": false, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4: eroava ISSN (022)" }, // human merge: warning // import: do not use, this is done in matcher // - warn if ISBNs differ too much { "name": "otherStandardIdentifier", "description": "otherStandardIdentifier (validation)", "function": getCheckFeature({ featureName: "otherStandardIdentifier" }), "validation": true, "preference": false, "internal": true, "import": false, "manual": "warning", "preference_message_fi": "", "validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4: eroava muu standarditunniste (024)" } /* {'name': 'allMatchingFeatures', 'description': 'allMatchingFeatures (validation)', 'function': checkAllFeatures, 'validation': true, 'preference': false, 'internal': true, 'import': false, 'manual': 'warning', 'preference_message_fi': '', 'validation_message_fi': 'foobar'}, */ ]; export const comparisonTasksTable = { recordImport: [...comparisonTasks].filter(isUsableForImport), // merge two records existing in database together, checked by human user in UI humanMerge: [...comparisonTasks.filter(isUsableForInternal).filter(isUsableForManual)] }; debugDev(`------------ RECORD IMPORT --------`); debugDev(`comparisonTasksTable.recordImport has ${comparisonTasksTable.recordImport.length} comparison tasks:`); debugDev(`${comparisonTasksTable.recordImport.map((task) => task.description).join("\n")}`); debugDev(`------------ HUMAN MERGE --------`); debugDev(`comparisonTasksTable.humanMerge has ${comparisonTasksTable.humanMerge.length} comparison tasks:`); debugDev(`${comparisonTasksTable.humanMerge.map((task) => task.description).join("\n")}`); function isUsableForManual(task) { if (task.manual !== void 0 && task.manual === false) { debugDev(`${task.name} has manual: ${task.manual}`); return false; } debugDev(`${task.name} has manual: ${task.manual}`); return true; } function isUsableForInternal(task) { if (task.internal !== void 0 && task.internal === false) { debugDev(`${task.name} has internal: ${task.internal}`); return false; } debugDev(`${task.name} has internal: ${task.internal}`); return true; } function isUsableForImport(task) { if (task.import !== void 0 && task.import === false) { debugDev(`${task.name} has import: ${task.import}`); return false; } debugDev(`${task.name} has import: ${task.import}`); return true; } //# sourceMappingURL=comparisonTasks.js.map