@natlibfi/melinda-record-match-validator
Version:
Validates if two records matched by melinda-record-matching can be merged and sets merge priority
164 lines (143 loc) • 6.08 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.checkPartSetFeatures = checkPartSetFeatures;
exports.compareRecordsPartSetFeatures = compareRecordsPartSetFeatures;
exports.getPartSetFeatures = getPartSetFeatures;
exports.getTitleFeaturesType = getTitleFeaturesType;
exports.getTitleForPartsAndSets = getTitleForPartsAndSets;
var _title = require("./title");
var _partsAndSetsExtent = require("./partsAndSetsExtent");
var _debug = _interopRequireDefault(require("debug"));
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
const debug = (0, _debug.default)('@natlibfi/melinda-record-match-validator:partsAndSets');
const debugDev = debug.extend('dev');
const debugData = debug.extend('data');
// This validator checks (or tries to check) that a record that describes a part of a set and a record
// that describes the whole set won't be considered a valid match
// We could also have functionalities for checking that records describing different parts of a set would
// not be consisered a valid match
// Use cases:
// * multi-part monographs described as parts or as whole sets
// * possible also different sets of mixed materials
// Extract partSetFeatures from a record
function getPartSetFeatures(record) {
// Get parts and sets features from f300 : extent
const extentsForPartsAndSets = (0, _partsAndSetsExtent.getExtentsForPartsAndSets)(record);
debugData(`Extents: <${JSON.stringify(extentsForPartsAndSets)}>`);
// Get parts and sets feature from f245 subfields for parts
const titleForPartsAndSets = getTitleForPartsAndSets(record);
debugData(`Title: <${JSON.stringify(titleForPartsAndSets)}>`);
// We should also get parts and sets features from:
// * StandardIdentifiers and their qualifiers
// * if record has two ISBNs with qualifiers 'Part 1' and 'Part 2' it's a record for a set
// * if record has only one ISBN with qualifier 'set' it's a record for a set
// * if record has two ISBNs with qualifiers 'Part 2' and 'set' it's a record for a part
// * StandardIdentifier amounts
// * if record has several ISBNs it might be a set (discard cases where ISBNs are ISBN10 and ISBN13)
// * Notefields 500/515
// * if record has field 500/515 with note 'ISBN for complete set', it's probably a record for a part - 020 has ISBN for the part
// Different fields with $3
// * if record has fields that have subfields $3 like 'Part 1', 'Part 2' it's probably a record for a set
const allTypes = [titleForPartsAndSets.type, ...extentsForPartsAndSets.map(extent => extent.type)];
debugData(allTypes);
function getTypeFromAllTypes(allTypes) {
// If we have set-type features and no part-type features we can assume the record is of type 'set'
if (allTypes.some(type => type === 'set') && !allTypes.some(type => type === 'part')) {
return 'set';
}
// If we have part-type features and no part-type features we can assume the record is of type 'part'
if (allTypes.some(type => type === 'part') && !allTypes.some(type => type === 'set')) {
return 'part';
}
// If we have a set-type feature can assume the record is of type 'set'
if (allTypes.some(type => type === 'set')) {
return 'set';
}
// If we have both part-type features and set-type features, or no part-set-features assume we don't know the type
return 'unknown';
}
return {
type: getTypeFromAllTypes(allTypes),
details: {
extentsForPartsAndSets,
titleForPartsAndSets
}
};
}
function getTitleForPartsAndSets(record) {
// Both $n (number of part) and $p (name of part) are repeatable subfields - do we get all of the instances?
//const title = get245(record);
const titleFeatures = (0, _title.getTitleFeatures)(record);
//debug(titleFeatures);
//const type = getTitleType(title);
const featuresType = getTitleFeaturesType(titleFeatures);
debugDev(`FeaturesType: ${featuresType}`);
return {
...titleFeatures,
type: featuresType
};
}
function getTitleFeaturesType(title) {
debugData(title);
if (title === undefined) {
return 'unknown';
}
const {
namesOfPartInSectionOfAWork,
numbersOfPartInSectionOfAWork
} = title;
if (namesOfPartInSectionOfAWork.length < 1 && numbersOfPartInSectionOfAWork.length < 1) {
return 'unknown';
}
// If we have one subfield $n and its has not `1-2` type of content we can assume part
// Note: we can have a case where we have a set of subparts that contain a part ...
if (numbersOfPartInSectionOfAWork.length === 1) {
debugDev(`We have a number: ${numbersOfPartInSectionOfAWork[0]}`);
if (numbersOfPartInSectionOfAWork[0].match(/\d+-\d+/u)) {
debugDev(`But number is of several parts: ${numbersOfPartInSectionOfAWork[0]}`);
return 'unknown';
}
return 'part';
}
// If we have a subgield $p we can assume part
if (namesOfPartInSectionOfAWork.length === 1) {
debugDev(`We have a name: ${namesOfPartInSectionOfAWork[0]}`);
return 'part';
}
// we could also make guesses about numbers / roman numerals in the actual title subfields $a and $b
return 'unknown';
}
// Compare two records by their partSetFeatures
function compareRecordsPartSetFeatures({
record1,
record2
}) {
const partSetFeatures1 = getPartSetFeatures(record1);
const partSetFeatures2 = getPartSetFeatures(record2);
return checkPartSetFeatures({
partSetFeatures1,
partSetFeatures2
});
}
// Check two sets of partSetFeatures
function checkPartSetFeatures({
partSetFeatures1,
partSetFeatures2
}) {
debugData(JSON.stringify(partSetFeatures1));
debugData(JSON.stringify(partSetFeatures2));
if (partSetFeatures1.type === partSetFeatures2.type) {
return true;
}
if (partSetFeatures1.type === 'unknown' || partSetFeatures2.type === 'unknown') {
return true;
}
if (partSetFeatures1.type !== partSetFeatures2.type) {
return false;
}
// Fallback, but we should not end up here
return false;
}
//# sourceMappingURL=partsAndSets.js.map