@ezs/basics
Version:
Basics statements for EZS
82 lines (79 loc) • 2.38 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = void 0;
var _lodash = require("lodash");
const UPPER_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
const SENTENCE_INIT = ' ';
const SENTENCE_ENDING = '.?!';
/*
* Segment sentences from `str` into an array
* @param {string} str
* @returns {string[]}
*/
const segmentSentences = str => {
const characters = Array.from(str);
const sentences = characters.reduce(
/*
* @param {string[]} prevSentences
* @param {string} character
* @return {string[]}
*/
(prevSentences, character) => {
const currentSentence = prevSentences.slice(-1)[0];
const [char1, char2] = currentSentence.slice(-2);
if (SENTENCE_ENDING.includes(character)) {
if (character !== '.') {
return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
}
if (char1 !== ' ') {
return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
}
if (!UPPER_LETTERS.includes(char2)) {
return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
}
}
return [...prevSentences.slice(0, -1), currentSentence + character];
}, [SENTENCE_INIT]).filter(sentence => sentence !== SENTENCE_INIT).map(sentence => sentence.trimStart());
return sentences;
};
const TXTSentences = (data, feed, ctx) => {
if (ctx.isLast()) {
return feed.close();
}
const path = ctx.getParam('path', 'value');
const value = (0, _lodash.get)(data, path);
const str = Array.isArray(value) ? value.map(item => typeof item === 'string' ? item : '').join(' ') : value;
const sentences = str ? segmentSentences(str) : [];
feed.write({
...data,
[path]: sentences
});
return feed.end();
};
/**
* Take a `String` and split it into an array of sentences.
*
* Input:
*
* ```json
* { "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." }
* ```
*
* Output:
*
* ```json
* { "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] }
* ```
*
* @name TXTSentences
* @param {String} [path="value"] path of the field to segment
* @returns {String[]}
* @deprecated
* see https://inist-cnrs.github.io/ezs/#/plugin-strings?id=sentences
*/
var _default = {
TXTSentences
};
exports.default = _default;