UNPKG

string-punctuation-tokenizer

Version:

Small library that provides functions to tokenize a string into an array of words with or without punctuation

70 lines (65 loc) 1.82 kB
import {tokenize} from './tokenizers'; /** * Gets the occurrence of a subString in a string by using the subString index in the string. * @param {Array} tokens * @param {Number} currentWordIndex * @param {String} subString * @return {Object} */ export const occurrenceInTokens = ( tokens, currentWordIndex, subString, ) => { let occurrence = 0; for (let i = 0; i <= currentWordIndex; i++) { if (tokens[i].token === subString) occurrence ++; } return occurrence; }; /** * Function that count occurrences of a substring in a string * @param {Array} tokens - The string to search in * @param {String} subString - The sub string to search for * @return {Integer} - the count of the occurrences */ export const occurrencesInTokens = ( tokens, subString, ) => { let occurrences = 0; tokens.forEach((token) => { if (token && token.token === subString) occurrences ++; }); return occurrences; }; /** * Gets the occurrence of a subString in a string by using the subString index in the string. * @param {String} text * @param {Number} currentWordIndex * @param {String} subString * @return {Object} */ export const occurrenceInString = ( text, currentWordIndex, subString, ) => { const tokens = tokenize({text, verbose: true}); const occurrence = occurrenceInTokens(tokens, currentWordIndex, subString); return occurrence; }; /** * Function that count occurrences of a substring in a string * @param {String} text - The string to search in * @param {String} subString - The sub string to search for * @return {Integer} - the count of the occurrences */ export const occurrencesInString = ( text, subString, ) => { const tokens = tokenize({text, verbose: true}); const occurrences = occurrencesInTokens(tokens, subString); return occurrences; };