@natlibfi/melinda-record-match-validator
Version:
Validates if two records matched by melinda-record-matching can be merged and sets merge priority
542 lines (541 loc) • 20.8 kB
JavaScript
import createDebugLogger from "debug";
import { isDeletedRecord, isTestRecord, isComponentRecord } from "@natlibfi/melinda-commons";
import { checkSID } from "./compareFunctions/compareFieldSID.js";
import { checkLOW, checkLOWinternal } from "./compareFunctions/compareFieldLOW.js";
import { checkCAT } from "./validators/fieldCAT.js";
import { check040b, check040e } from "./compareFunctions/compareField040.js";
import { checkAllTitleFeatures } from "./compareFunctions/compareTitle.js";
import { checkPublisher } from "./compareFunctions/compareField26X.js";
import { check042 } from "./compareFunctions/compareField042.js";
import { check336, check337, check338 } from "./compareFunctions/compareField33X.js";
import { check773, check773Internal } from "./validators/field773.js";
import { checkLeader, checkTypeOfRecord, checkRecordLevel, checkBibliographicLevel } from "./compareFunctions/compareLeader.js";
import { check005, check008 } from "./compareFunctions/compareControlFields.js";
import { compareRecordsPartSetFeatures } from "./validators/partsAndSets.js";
import { performAudioSanityCheck } from "./compareFunctions/sanityCheckAudio.js";
import { performDaisySanityCheck } from "./compareFunctions/sanityCheckDaisy.js";
import { performDvdSanityCheck } from "./compareFunctions/sanityCheckDvd.js";
import { performIsbnQualifierCheck } from "./compareFunctions/sanityCheckIsbnQualifer.js";
import { getCheckFeature } from "./validators/matchingFeatureChecks.js";
const debug = createDebugLogger("@natlibfi/melinda-record-match-validator:index");
const debugDev = debug.extend("dev");
function checkExistence({ record1, record2 }) {
if (record1 === void 0 || record2 === void 0) {
return false;
}
if (isDeletedRecord(record1) || isDeletedRecord(record2)) {
return false;
}
return true;
}
function checkTestRecord({ record1, record2 }) {
if (isTestRecord(record1) !== isTestRecord(record2)) {
return false;
}
return true;
}
function checkHostComponent({ record1, record2 }) {
if (isComponentRecord(record1, false, ["973"]) !== isComponentRecord(record2, false, ["973"])) {
return false;
}
return true;
}
const comparisonTasks = [
// NB! These are/should be in priority order for recordImport, which checks only until first failure!
// undefined or deleted records cannot be merged (both automatic and human merge)
{
"name": "existence",
"description": "existence (validation only)",
"function": checkExistence,
"validation": true,
"preference": false,
"preference_message_fi": "",
"validation_message_fi": "poistettuja tietueita ei voi yhdist\xE4\xE4",
"tags": [{ "tag": "STA" }, { "tag": "DEL" }, { "tag": "LDR", "chars": ["5"] }]
},
// test records and non test records should not be merged
{
"name": "test record",
"description": "test record",
"function": checkTestRecord,
"validation": true,
"preference": false,
"import": true,
"internal": true,
"manual": "error",
"preference_message_fi": "",
"validation_message_fi": "testitietuetta ja normaalia tietuetta ei voi yhdist\xE4\xE4",
"tags": [{ "tag": "STA" }]
},
// host and component records should not be merged
{
"name": "host/component",
"description": "host/component record",
"function": checkHostComponent,
"validation": true,
"preference": false,
"import": true,
"internal": true,
"manual": "error",
"preference_message_fi": "",
"validation_message_fi": "osakohdetta ja ei-osakohdetta ei voi yhdist\xE4\xE4",
"tags": [{ "tag": "773" }, { "tag": "973" }, { "tag": "LDR", "chars": ["7"] }]
},
// checks record type LDR/06 && bibliographic level LDR/07 (validation) and LDR/17 for encoding level (preference)s
// - fail merge if LDR/006-7 are mismatch
// - preference based on encoding level and more nuanced prepublication level for prepub records
// Prioritize LDR/17 (encoding level)
{
"name": "leader",
"description": "leader (validation and preference)",
"function": checkLeader,
"validation": true,
"preference": true,
"manual": false,
"import": true,
"internal": true,
"validation_message_fi": "ainestotyypilt\xE4\xE4n tai bibliografiselta (LDR/06-07) tasoltaan eroavia tietueita ei voi yhdist\xE4\xE4",
"preference_message_fi": "suosi koodaus- ja ennakkotietotasoltaan (LDR/17) parempaa tietuetta"
},
// Singular leader comparisons for Human/internal merge
// leader typeOfRecord LDR/006
// do not use same time as checkLeader that checks all three leader values
// we need to error these in MergeUI as mergeReducers refuse to handle records with differing LDR/06
{
"name": "typeOfRecord",
"description": "leader: typeOfRecord (validation)",
"function": checkTypeOfRecord,
"validation": true,
"preference": false,
//'manual': 'warning',
"manual": "error",
"import": false,
"internal": true,
"validation_message_fi": "aineistotyypilt\xE4\xE4n (LDR/06) eroavia tietueita ei voi yhdist\xE4\xE4",
"preference_message_fi": ""
},
// leader bibliographicLevel LDR/007
// do not use same time as checkLeader that checks all three leader values
// Currently not in use, we check components with different check
{
"name": "bibliographicLevel",
"description": "leader: bibliographicLevel (validation)",
"function": checkBibliographicLevel,
"validation": true,
"preference": false,
"import": false,
"internal": false,
"manual": "error",
"validation_message_fi": "bibliografiselta tasoltaan (LDR/07) eroavia tietueita ei voi yhdist\xE4\xE4",
"preference_message_fi": ""
},
// leader encodingLevel LDR/017 + f500/f594
// do not use same time as checkLeader that checks all three leader values
{
"name": "recordLevel",
"description": "leader + 500/594: recordLevel (preference)",
"function": checkRecordLevel,
"validation": false,
"preference": true,
"import": false,
"internal": true,
"manual": "warning",
"validation_message_fi": "",
"preference_message_fi": "suosi koodaus- ja ennakkotietotasoltaan parempaa tietuetta (LDR/17)"
},
// just preference also for human merge (we like records with 264 instead of 260, they are probably more RDA-compatible)
// Bit high on the preference list, isn't it?
{
"name": "RDA from publisher",
"description": "publisher (264>260) (preference only)",
"function": checkPublisher,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": false,
// let's not give too many preference warnings for a human user
"preference_message_fi": "suosi tietuetta, jossa julkaisutiedot ovat kent\xE4ss\xE4 264",
"validation_message_fi": ""
},
// what are we checking here? could probably be a warning for human merge
// - fail merging online and direct using electronical resources (008/23 or 008/29 form of item)
// - fail merge if 008/06 type of date/publication status codes are a severe mismatch
// - preference from 008/06 type of date/publication status codes
// - gathers 008/39 cataloiguingSource, but does do anything with it?
{
"name": "f008",
"description": "008 test (validation and preference)",
"function": check008,
"validation": true,
"preference": true,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "suosi tietuetta, jossa on tarkemmin ilmoitettu julkaisuajan tyyppi/julkaisun tila (008/06)",
"validation_message_fi": "tietueita, joissa on ristiriitainen julkaisuajan tyyppi/julkaisun tila (008/06) ei voi yhdist\xE4\xE4"
},
// This test checks is just for preference despite its description!
// Priority order: FIKKA > ANY > NONE
{
"name": "LOW-for-preference",
"description": "LOW test (preference)",
"function": checkLOW,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": true,
"preference_message_fi": "suosi tietuetta, jossa on Kansalliskirjaston tietokantatunnus (LOW) (tai tietuetta, jossa ylip\xE4\xE4ns\xE4 on joku tietokantatunnus)",
"validation_message_fi": ""
},
// database internal merge cannot merge two records with same low
{
"name": "LOW-validation-for-internal",
"description": "LOW test (validation for internal)",
"function": checkLOWinternal,
"validation": true,
"preference": false,
"import": false,
"internal": true,
"manual": "error",
"preference_message_fi": "",
"validation_message_fi": "tietueita, joissa on saman paikalliskannan tietokantatunnus (LOW), ei voi yhdist\xE4\xE4"
},
// This test check 042 to preference
{
"name": "f042-authentication-code",
"description": "field 042: authentication code (preference only)",
"function": check042,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": true,
"preference_message_fi": "suosi tietuetta, jossa on Kansallisbibliografian tai Kansallisdiskografian autentikaatiokoodi (042)",
"validation_message_fi": ""
},
{
"name": "CAT",
"description": "CAT test (preference only)",
"function": checkCAT,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": false,
// let's not give too many preference warnings for a human cataloger
"preference_message_fi": "suosi tietuetta, jolla on paremmat kuvailuhistoriatiedot",
"validation_message_fi": ""
},
// NB! I'd like to have a test for 008/06, but them specs for it are elusive?
{
"name": "title",
"description": "field 245 (title)",
"function": checkAllTitleFeatures,
"validation": true,
"preference": false,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden nimeketiedot (245) eroavat"
},
// Do not use old check f245 same time as checkAllTitleFeatures
//{'name': 'title-old', 'description': 'field 245 (title)', 'function': check245, 'validation': true, 'preference': false, 'manual': 'warning'},
// human merge: warning
{
"name": "f336",
"description": "field 336 (content type) test (validation and preference)",
"function": check336,
"validation": true,
"preference": true,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "suosi tietuetta, jolla on tarkemmat sis\xE4lt\xF6tyyppitiedot (336)",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden sis\xE4lt\xF6tyyppitiedot (336) eroavat"
},
// human merge: warning
{
"name": "f337",
"description": "field 337 (media type) test (validation and preference)",
"function": check337,
"validation": true,
"preference": true,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "suosi tietuetta, jolla on tarkemmat mediatyyppitiedot (337)",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden mediatyyppitiedot (337) eroavat"
},
// human merge: warning
{
"name": "f338",
"description": "field 338 (carrier type) test (validation and preference)",
"function": check338,
"validation": true,
"preference": true,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "suosi tietuetta, jolla on tarkemmat tallennetyyppitiedot (338)",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, niiden tallennetyyppitiedot (338) eroavat"
},
// human merge: warning for subfields q&g - $w actually should be different ...
{
"name": "f773-for-internal",
"description": "773 $wgq test (validation only)",
"function": check773Internal,
"validation": true,
"preference": false,
"internal": true,
"import": false,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, osakohteen sijaintitiedot eroavat (773)"
},
{
"name": "f773-for-import",
"description": "773 $wgq test (validation only)",
"function": check773,
"validation": true,
"preference": false,
"internal": false,
"import": true,
"manual": false,
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, osakohteen sijaintitiedot eroavat (773)"
},
{
"name": "f040b",
"description": "040$b (language of cataloging) (preference only)",
"function": check040b,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": true,
"preference_message_fi": "suosi tietuetta, jolla on soveltuvin kuvailukieli (040)",
"validation_message_fi": ""
},
{
"name": "f040e",
"description": "040$e (description conventions) (preference only)",
"function": check040e,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": true,
"preference_message_fi": "suosi tietuetta, jonka kuvailus\xE4\xE4nn\xF6iksi on merkitty RDA (040)",
"validation_message_fi": ""
},
// SID for import (do not use for manual database internal merge)
// - fail merge for different SIDs from same database
// set preference for record that has most commons SIDs
{
"name": "fSID-for-import",
"description": "SID test (validation and preference), for import only",
"function": checkSID,
"validation": true,
"preference": true,
"internal": false,
"import": true,
"manual": false,
"preference_message_fi": "suosi tietuetta, jolla on enemm\xE4n linkkej\xE4 vastintietueisiin paikalliskannoissa",
"validation_message_fi": "tietueita, joilla on samassa paikalliskannassa eri vastintietue ei voi yhdist\xE4\xE4 (SID)"
},
// preference for record that's updated more recently
{
"name": "f005",
"description": "005 timestamp test (preference)",
"function": check005,
"validation": false,
"preference": true,
"internal": true,
"import": true,
"manual": false,
// let's not give too many preference warnings for a human cataloger
"preference_message_fi": "suosi tietuetta, jota on p\xE4ivitetty viimeksi",
"validation_message_fi": ""
},
// human merge: warning
// - fail merge, for CD vs LP record
{
"name": "audio-sanity",
"description": "audio sanity check (validation only)",
"function": performAudioSanityCheck,
"validation": true,
"preference": false,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tietueissa on kuvailtu CD- ja LP-levy, tarkista voiko ne yhdist\xE4\xE4"
},
// human merge: warning
// - fail merge, for daisy-audiobook vs generic audiobook
{
"name": "daisy-sanity",
"description": "Daisy sanity check (validation only)",
"function": performDaisySanityCheck,
"validation": true,
"preference": false,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tietueissa on kuvailtu yleinen ja Daisy-\xE4\xE4nikirja, tarkista voiko ne yhdist\xE4\xE4"
},
// human merge: warning
// - fail merge, for DVD vs Blueray video discs
{
"name": "dvd-blueray-sanity",
"description": "DVD vs Blu-Ray sanity check (validation only)",
"function": performDvdSanityCheck,
"validation": true,
"preference": false,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tietueissa on kuvailtu DVD- ja Bluray-levy, tarkista voiko ne yhdist\xE4\xE4"
},
// human merge: warning
// - fail merge, for mismatching ISBN qualifiers
{
"name": "isbn-qualifier",
"description": "ISBN qualifier sanity check (validation only)",
"function": performIsbnQualifierCheck,
"validation": true,
"preference": false,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tietueissa on eroava ISBN-tarkenne (020), tarkista voiko ne yhdist\xE4\xE4"
},
// human merge: warning
// - fail merge, part of a multipart monograph vs whole set of multipart monographs
{
"name": "parts-sets",
"description": "Parts vs sets test (validation)",
"function": compareRecordsPartSetFeatures,
"validation": true,
"preference": false,
"internal": true,
"import": true,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tietueissa on kuvailtu yksitt\xE4inen moniosaisen monografian osa ja moniosainen monografia kokonaisuutena, tarkista voiko ne yhdist\xE4\xE4"
},
// human merge: warning
// import: do not use, this is done in matcher
// - fail merge, if languages in records differ too much
{
"name": "language",
"description": "Language (validation)",
"function": getCheckFeature({ featureName: "language" }),
"validation": true,
"preference": false,
"internal": true,
"import": false,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4, kielitiedot eroavat (008, 041)"
},
// human merge: warning
// import: do not use, this is done in matcher
// - warn if ISBNs differ too much
{
"name": "ISBN",
"description": "ISBN (validation)",
"function": getCheckFeature({ featureName: "isbn" }),
"validation": true,
"preference": false,
"internal": true,
"import": false,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4: eroava ISBN (020)"
},
// human merge: warning
// import: do not use, this is done in matcher
// - warn if ISSNs differ too much
{
"name": "ISSN",
"description": "ISSN (validation)",
"function": getCheckFeature({ featureName: "issn" }),
"validation": true,
"preference": false,
"internal": true,
"import": false,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4: eroava ISSN (022)"
},
// human merge: warning
// import: do not use, this is done in matcher
// - warn if ISBNs differ too much
{
"name": "otherStandardIdentifier",
"description": "otherStandardIdentifier (validation)",
"function": getCheckFeature({ featureName: "otherStandardIdentifier" }),
"validation": true,
"preference": false,
"internal": true,
"import": false,
"manual": "warning",
"preference_message_fi": "",
"validation_message_fi": "tarkista voiko tietueet yhdist\xE4\xE4: eroava muu standarditunniste (024)"
}
/*
{'name': 'allMatchingFeatures',
'description': 'allMatchingFeatures (validation)',
'function': checkAllFeatures,
'validation': true,
'preference': false,
'internal': true,
'import': false,
'manual': 'warning',
'preference_message_fi': '',
'validation_message_fi': 'foobar'},
*/
];
export const comparisonTasksTable = {
recordImport: [...comparisonTasks].filter(isUsableForImport),
// merge two records existing in database together, checked by human user in UI
humanMerge: [...comparisonTasks.filter(isUsableForInternal).filter(isUsableForManual)]
};
debugDev(`------------ RECORD IMPORT --------`);
debugDev(`comparisonTasksTable.recordImport has ${comparisonTasksTable.recordImport.length} comparison tasks:`);
debugDev(`${comparisonTasksTable.recordImport.map((task) => task.description).join("\n")}`);
debugDev(`------------ HUMAN MERGE --------`);
debugDev(`comparisonTasksTable.humanMerge has ${comparisonTasksTable.humanMerge.length} comparison tasks:`);
debugDev(`${comparisonTasksTable.humanMerge.map((task) => task.description).join("\n")}`);
function isUsableForManual(task) {
if (task.manual !== void 0 && task.manual === false) {
debugDev(`${task.name} has manual: ${task.manual}`);
return false;
}
debugDev(`${task.name} has manual: ${task.manual}`);
return true;
}
function isUsableForInternal(task) {
if (task.internal !== void 0 && task.internal === false) {
debugDev(`${task.name} has internal: ${task.internal}`);
return false;
}
debugDev(`${task.name} has internal: ${task.internal}`);
return true;
}
function isUsableForImport(task) {
if (task.import !== void 0 && task.import === false) {
debugDev(`${task.name} has import: ${task.import}`);
return false;
}
debugDev(`${task.name} has import: ${task.import}`);
return true;
}
//# sourceMappingURL=comparisonTasks.js.map