UNPKG

@natlibfi/melinda-record-match-validator

Version:

Validates if two records matched by melinda-record-matching can be merged and sets merge priority

83 lines (80 loc) 3.27 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.getExtentType = getExtentType; exports.getExtentsForPartsAndSets = getExtentsForPartsAndSets; exports.parseExtentString = parseExtentString; var _collectUtils = require("./collectFunctions/collectUtils"); var _debug = _interopRequireDefault(require("debug")); function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; } const debug = (0, _debug.default)('@natlibfi/melinda-record-match-validator:partsAndSets:test'); const debugData = debug.extend('data'); function getExtentsForPartsAndSets(record) { const f300s = (0, _collectUtils.hasFields)('300', record, f300ToJSON); debug('Field 300 info: %o', f300s); return f300s; function f300ToJSON(field) { // Note: $a is repeatable, this should fetch all $a subfields // Repeated $a:s are pretty rare, though const extentString = (0, _collectUtils.getSubfield)(field, 'a'); debugData(`f300 $a: ${extentString}`); const extentArray = parseExtentString(extentString); const type = getExtentType(extentArray); const extent = { type, string: extentString, array: extentArray, additionalExtent: undefined }; // we get the non-repeatable $e for additionla materials const additionalExtentString = (0, _collectUtils.getSubfield)(field, 'e'); debugData(`f300 $e: ${additionalExtentString}`); if (additionalExtentString && additionalExtentString !== 'undefined') { const additionalExtentArray = parseExtentString(additionalExtentString); const additionalType = getExtentType(additionalExtentArray); return { ...extent, additionalExtent: { string: additionalExtentString, array: additionalExtentArray, type: additionalType } }; } return extent; } } function getExtentType(extentArray) { debug(`Getting extentType from extentArray`); debugData(extentArray); const setTypeUnitsRegex = /vol|volumes|nidettä|osaa|band/iu; if (extentArray.some(extent => extent.amount > 1 && extent.unit.match(setTypeUnitsRegex))) { return 'set'; } return 'unknown'; } function parseExtentString(extentString) { debug(`Handling extentString: |${extentString}|`); const punctlessString = (0, _collectUtils.stripPunc)(extentString); debug(`Removed punctuation: |${punctlessString}|`); // get all extent-clauses like: "2 vol", "248 pages", "1 verkkoaineisto" // we probably should be able to handle also roman numerals to amount // \w does not match äåå? // should we handle X unit (Y unit2 Z unit3) cases somehow? //const regexpExtent = /(?<amount>\d+) (?<unit>[\w]+)/mgu; const regexpExtent = /(?<amount>\p{N}+) (?<unit>[\p{L}\p{N}-]+)/mgu; const foundExtents = []; // eslint-disable-next-line functional/no-loop-statements for (const match of punctlessString.matchAll(regexpExtent)) { debug(`amount: ${match.groups.amount} unit: ${match.groups.unit}`); // eslint-disable-next-line functional/immutable-data foundExtents.push({ amount: match.groups.amount, unit: match.groups.unit }); } debugData(JSON.stringify(foundExtents)); return foundExtents; } //# sourceMappingURL=partsAndSetsExtent.js.map