string-punctuation-tokenizer
Version:
Small library that provides functions to tokenize a string into an array of words with or without punctuation
82 lines (67 loc) • 6.79 kB
JavaScript
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.occurrencesInString = exports.occurrenceInString = exports.occurrencesInTokens = exports.occurrenceInTokens = void 0;
var _tokenizers = require("./tokenizers");
/**
* Gets the occurrence of a subString in a string by using the subString index in the string.
* @param {Array} tokens
* @param {Number} currentWordIndex
* @param {String} subString
* @return {Object}
*/
var occurrenceInTokens = function occurrenceInTokens(tokens, currentWordIndex, subString) {
var occurrence = 0;
for (var i = 0; i <= currentWordIndex; i++) {
if (tokens[i].token === subString) occurrence++;
}
return occurrence;
};
/**
* Function that count occurrences of a substring in a string
* @param {Array} tokens - The string to search in
* @param {String} subString - The sub string to search for
* @return {Integer} - the count of the occurrences
*/
exports.occurrenceInTokens = occurrenceInTokens;
var occurrencesInTokens = function occurrencesInTokens(tokens, subString) {
var occurrences = 0;
tokens.forEach(function (token) {
if (token && token.token === subString) occurrences++;
});
return occurrences;
};
/**
* Gets the occurrence of a subString in a string by using the subString index in the string.
* @param {String} text
* @param {Number} currentWordIndex
* @param {String} subString
* @return {Object}
*/
exports.occurrencesInTokens = occurrencesInTokens;
var occurrenceInString = function occurrenceInString(text, currentWordIndex, subString) {
var tokens = (0, _tokenizers.tokenize)({
text: text,
verbose: true
});
var occurrence = occurrenceInTokens(tokens, currentWordIndex, subString);
return occurrence;
};
/**
* Function that count occurrences of a substring in a string
* @param {String} text - The string to search in
* @param {String} subString - The sub string to search for
* @return {Integer} - the count of the occurrences
*/
exports.occurrenceInString = occurrenceInString;
var occurrencesInString = function occurrencesInString(text, subString) {
var tokens = (0, _tokenizers.tokenize)({
text: text,
verbose: true
});
var occurrences = occurrencesInTokens(tokens, subString);
return occurrences;
};
exports.occurrencesInString = occurrencesInString;
//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJzb3VyY2VzIjpbIi4uL3NyYy9vY2N1cnJlbmNlcy5qcyJdLCJuYW1lcyI6WyJvY2N1cnJlbmNlSW5Ub2tlbnMiLCJ0b2tlbnMiLCJjdXJyZW50V29yZEluZGV4Iiwic3ViU3RyaW5nIiwib2NjdXJyZW5jZSIsImkiLCJ0b2tlbiIsIm9jY3VycmVuY2VzSW5Ub2tlbnMiLCJvY2N1cnJlbmNlcyIsImZvckVhY2giLCJvY2N1cnJlbmNlSW5TdHJpbmciLCJ0ZXh0IiwidmVyYm9zZSIsIm9jY3VycmVuY2VzSW5TdHJpbmciXSwibWFwcGluZ3MiOiI7Ozs7Ozs7QUFBQTs7QUFFQTs7Ozs7OztBQU9PLElBQU1BLGtCQUFrQixHQUFHLFNBQXJCQSxrQkFBcUIsQ0FDaENDLE1BRGdDLEVBRWhDQyxnQkFGZ0MsRUFHaENDLFNBSGdDLEVBSTdCO0FBQ0gsTUFBSUMsVUFBVSxHQUFHLENBQWpCOztBQUNBLE9BQUssSUFBSUMsQ0FBQyxHQUFHLENBQWIsRUFBZ0JBLENBQUMsSUFBSUgsZ0JBQXJCLEVBQXVDRyxDQUFDLEVBQXhDLEVBQTRDO0FBQzFDLFFBQUlKLE1BQU0sQ0FBQ0ksQ0FBRCxDQUFOLENBQVVDLEtBQVYsS0FBb0JILFNBQXhCLEVBQW1DQyxVQUFVO0FBQzlDOztBQUNELFNBQU9BLFVBQVA7QUFDRCxDQVZNO0FBWVA7Ozs7Ozs7Ozs7QUFNTyxJQUFNRyxtQkFBbUIsR0FBRyxTQUF0QkEsbUJBQXNCLENBQ2pDTixNQURpQyxFQUVqQ0UsU0FGaUMsRUFHOUI7QUFDSCxNQUFJSyxXQUFXLEdBQUcsQ0FBbEI7QUFDQVAsRUFBQUEsTUFBTSxDQUFDUSxPQUFQLENBQWUsVUFBQ0gsS0FBRCxFQUFXO0FBQ3hCLFFBQUlBLEtBQUssSUFBSUEsS0FBSyxDQUFDQSxLQUFOLEtBQWdCSCxTQUE3QixFQUF3Q0ssV0FBVztBQUNwRCxHQUZEO0FBR0EsU0FBT0EsV0FBUDtBQUNELENBVE07QUFXUDs7Ozs7Ozs7Ozs7QUFPTyxJQUFNRSxrQkFBa0IsR0FBRyxTQUFyQkEsa0JBQXFCLENBQ2hDQyxJQURnQyxFQUVoQ1QsZ0JBRmdDLEVBR2hDQyxTQUhnQyxFQUk3QjtBQUNILE1BQU1GLE1BQU0sR0FBRywwQkFBUztBQUFDVSxJQUFBQSxJQUFJLEVBQUpBLElBQUQ7QUFBT0MsSUFBQUEsT0FBTyxFQUFFO0FBQWhCLEdBQVQsQ0FBZjtBQUNBLE1BQU1SLFVBQVUsR0FBR0osa0JBQWtCLENBQUNDLE1BQUQsRUFBU0MsZ0JBQVQsRUFBMkJDLFNBQTNCLENBQXJDO0FBQ0EsU0FBT0MsVUFBUDtBQUNELENBUk07QUFVUDs7Ozs7Ozs7OztBQU1PLElBQU1TLG1CQUFtQixHQUFHLFNBQXRCQSxtQkFBc0IsQ0FDakNGLElBRGlDLEVBRWpDUixTQUZpQyxFQUc5QjtBQUNILE1BQU1GLE1BQU0sR0FBRywwQkFBUztBQUFDVSxJQUFBQSxJQUFJLEVBQUpBLElBQUQ7QUFBT0MsSUFBQUEsT0FBTyxFQUFFO0FBQWhCLEdBQVQsQ0FBZjtBQUNBLE1BQU1KLFdBQVcsR0FBR0QsbUJBQW1CLENBQUNOLE1BQUQsRUFBU0UsU0FBVCxDQUF2QztBQUNBLFNBQU9LLFdBQVA7QUFDRCxDQVBNIiwic291cmNlc0NvbnRlbnQiOlsiaW1wb3J0IHt0b2tlbml6ZX0gZnJvbSAnLi90b2tlbml6ZXJzJztcblxuLyoqXG4gKiBHZXRzIHRoZSBvY2N1cnJlbmNlIG9mIGEgc3ViU3RyaW5nIGluIGEgc3RyaW5nIGJ5IHVzaW5nIHRoZSBzdWJTdHJpbmcgaW5kZXggaW4gdGhlIHN0cmluZy5cbiAqIEBwYXJhbSB7QXJyYXl9IHRva2Vuc1xuICogQHBhcmFtIHtOdW1iZXJ9IGN1cnJlbnRXb3JkSW5kZXhcbiAqIEBwYXJhbSB7U3RyaW5nfSBzdWJTdHJpbmdcbiAqIEByZXR1cm4ge09iamVjdH1cbiAqL1xuZXhwb3J0IGNvbnN0IG9jY3VycmVuY2VJblRva2VucyA9IChcbiAgdG9rZW5zLFxuICBjdXJyZW50V29yZEluZGV4LFxuICBzdWJTdHJpbmcsXG4pID0+IHtcbiAgbGV0IG9jY3VycmVuY2UgPSAwO1xuICBmb3IgKGxldCBpID0gMDsgaSA8PSBjdXJyZW50V29yZEluZGV4OyBpKyspIHtcbiAgICBpZiAodG9rZW5zW2ldLnRva2VuID09PSBzdWJTdHJpbmcpIG9jY3VycmVuY2UgKys7XG4gIH1cbiAgcmV0dXJuIG9jY3VycmVuY2U7XG59O1xuXG4vKipcbiAqIEZ1bmN0aW9uIHRoYXQgY291bnQgb2NjdXJyZW5jZXMgb2YgYSBzdWJzdHJpbmcgaW4gYSBzdHJpbmdcbiAqIEBwYXJhbSB7QXJyYXl9IHRva2VucyAtIFRoZSBzdHJpbmcgdG8gc2VhcmNoIGluXG4gKiBAcGFyYW0ge1N0cmluZ30gc3ViU3RyaW5nIC0gVGhlIHN1YiBzdHJpbmcgdG8gc2VhcmNoIGZvclxuICogQHJldHVybiB7SW50ZWdlcn0gLSB0aGUgY291bnQgb2YgdGhlIG9jY3VycmVuY2VzXG4gKi9cbmV4cG9ydCBjb25zdCBvY2N1cnJlbmNlc0luVG9rZW5zID0gKFxuICB0b2tlbnMsXG4gIHN1YlN0cmluZyxcbikgPT4ge1xuICBsZXQgb2NjdXJyZW5jZXMgPSAwO1xuICB0b2tlbnMuZm9yRWFjaCgodG9rZW4pID0+IHtcbiAgICBpZiAodG9rZW4gJiYgdG9rZW4udG9rZW4gPT09IHN1YlN0cmluZykgb2NjdXJyZW5jZXMgKys7XG4gIH0pO1xuICByZXR1cm4gb2NjdXJyZW5jZXM7XG59O1xuXG4vKipcbiAqIEdldHMgdGhlIG9jY3VycmVuY2Ugb2YgYSBzdWJTdHJpbmcgaW4gYSBzdHJpbmcgYnkgdXNpbmcgdGhlIHN1YlN0cmluZyBpbmRleCBpbiB0aGUgc3RyaW5nLlxuICogQHBhcmFtIHtTdHJpbmd9IHRleHRcbiAqIEBwYXJhbSB7TnVtYmVyfSBjdXJyZW50V29yZEluZGV4XG4gKiBAcGFyYW0ge1N0cmluZ30gc3ViU3RyaW5nXG4gKiBAcmV0dXJuIHtPYmplY3R9XG4gKi9cbmV4cG9ydCBjb25zdCBvY2N1cnJlbmNlSW5TdHJpbmcgPSAoXG4gIHRleHQsXG4gIGN1cnJlbnRXb3JkSW5kZXgsXG4gIHN1YlN0cmluZyxcbikgPT4ge1xuICBjb25zdCB0b2tlbnMgPSB0b2tlbml6ZSh7dGV4dCwgdmVyYm9zZTogdHJ1ZX0pO1xuICBjb25zdCBvY2N1cnJlbmNlID0gb2NjdXJyZW5jZUluVG9rZW5zKHRva2VucywgY3VycmVudFdvcmRJbmRleCwgc3ViU3RyaW5nKTtcbiAgcmV0dXJuIG9jY3VycmVuY2U7XG59O1xuXG4vKipcbiAqIEZ1bmN0aW9uIHRoYXQgY291bnQgb2NjdXJyZW5jZXMgb2YgYSBzdWJzdHJpbmcgaW4gYSBzdHJpbmdcbiAqIEBwYXJhbSB7U3RyaW5nfSB0ZXh0IC0gVGhlIHN0cmluZyB0byBzZWFyY2ggaW5cbiAqIEBwYXJhbSB7U3RyaW5nfSBzdWJTdHJpbmcgLSBUaGUgc3ViIHN0cmluZyB0byBzZWFyY2ggZm9yXG4gKiBAcmV0dXJuIHtJbnRlZ2VyfSAtIHRoZSBjb3VudCBvZiB0aGUgb2NjdXJyZW5jZXNcbiAqL1xuZXhwb3J0IGNvbnN0IG9jY3VycmVuY2VzSW5TdHJpbmcgPSAoXG4gIHRleHQsXG4gIHN1YlN0cmluZyxcbikgPT4ge1xuICBjb25zdCB0b2tlbnMgPSB0b2tlbml6ZSh7dGV4dCwgdmVyYm9zZTogdHJ1ZX0pO1xuICBjb25zdCBvY2N1cnJlbmNlcyA9IG9jY3VycmVuY2VzSW5Ub2tlbnModG9rZW5zLCBzdWJTdHJpbmcpO1xuICByZXR1cm4gb2NjdXJyZW5jZXM7XG59O1xuIl19
;