@natlibfi/melinda-record-match-validator
Version:
Validates if two records matched by melinda-record-matching can be merged and sets merge priority
83 lines (80 loc) • 3.27 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.getExtentType = getExtentType;
exports.getExtentsForPartsAndSets = getExtentsForPartsAndSets;
exports.parseExtentString = parseExtentString;
var _collectUtils = require("./collectFunctions/collectUtils");
var _debug = _interopRequireDefault(require("debug"));
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
const debug = (0, _debug.default)('@natlibfi/melinda-record-match-validator:partsAndSets:test');
const debugData = debug.extend('data');
function getExtentsForPartsAndSets(record) {
const f300s = (0, _collectUtils.hasFields)('300', record, f300ToJSON);
debug('Field 300 info: %o', f300s);
return f300s;
function f300ToJSON(field) {
// Note: $a is repeatable, this should fetch all $a subfields
// Repeated $a:s are pretty rare, though
const extentString = (0, _collectUtils.getSubfield)(field, 'a');
debugData(`f300 $a: ${extentString}`);
const extentArray = parseExtentString(extentString);
const type = getExtentType(extentArray);
const extent = {
type,
string: extentString,
array: extentArray,
additionalExtent: undefined
};
// we get the non-repeatable $e for additionla materials
const additionalExtentString = (0, _collectUtils.getSubfield)(field, 'e');
debugData(`f300 $e: ${additionalExtentString}`);
if (additionalExtentString && additionalExtentString !== 'undefined') {
const additionalExtentArray = parseExtentString(additionalExtentString);
const additionalType = getExtentType(additionalExtentArray);
return {
...extent,
additionalExtent: {
string: additionalExtentString,
array: additionalExtentArray,
type: additionalType
}
};
}
return extent;
}
}
function getExtentType(extentArray) {
debug(`Getting extentType from extentArray`);
debugData(extentArray);
const setTypeUnitsRegex = /vol|volumes|nidettä|osaa|band/iu;
if (extentArray.some(extent => extent.amount > 1 && extent.unit.match(setTypeUnitsRegex))) {
return 'set';
}
return 'unknown';
}
function parseExtentString(extentString) {
debug(`Handling extentString: |${extentString}|`);
const punctlessString = (0, _collectUtils.stripPunc)(extentString);
debug(`Removed punctuation: |${punctlessString}|`);
// get all extent-clauses like: "2 vol", "248 pages", "1 verkkoaineisto"
// we probably should be able to handle also roman numerals to amount
// \w does not match äåå?
// should we handle X unit (Y unit2 Z unit3) cases somehow?
//const regexpExtent = /(?<amount>\d+) (?<unit>[\w]+)/mgu;
const regexpExtent = /(?<amount>\p{N}+) (?<unit>[\p{L}\p{N}-]+)/mgu;
const foundExtents = [];
// eslint-disable-next-line functional/no-loop-statements
for (const match of punctlessString.matchAll(regexpExtent)) {
debug(`amount: ${match.groups.amount} unit: ${match.groups.unit}`);
// eslint-disable-next-line functional/immutable-data
foundExtents.push({
amount: match.groups.amount,
unit: match.groups.unit
});
}
debugData(JSON.stringify(foundExtents));
return foundExtents;
}
//# sourceMappingURL=partsAndSetsExtent.js.map