UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

53 lines (49 loc) 2.04 kB
/** * * If the word starts with one of the prefixes "ב" "ה", "ו", "כ", "ל", "מ", "ש" it should be stemmed as the dictionary * does not contain forms with those prefixes. * * @param {string} word The word to check for a prefix. * @param {string[]} prefixes The prefixes that should be stemmed before checking the dictionary. * * @returns {string} Word without the prefix or the original word if no prefix was found */ const removePrefix = function( word, prefixes ) { if ( prefixes.some( prefix => word.startsWith( prefix ) ) ) { return word.slice( 1 ); } return word; }; /** * Stems Hebrew words (removes possible prefixes and returns lemma if found in the dictionary). * * @param {string} word The word to stem. * @param {Object} morphologyData The Hebrew morphology data. * * @returns {string} The stemmed word or the original word if no stem was found. */ export default function stem( word, morphologyData ) { const dictionaryStemmer = morphologyData.dictionary; // Check if the word exists in the dictionary stemmer. If yes, return base form of the word specified in the dictionary. let stemmedWord = dictionaryStemmer[ word ]; if ( stemmedWord ) { return stemmedWord; } // If the word was not found in the dictionary, try to remove a prefix from the word and see whether the deprefixed word is found. const wordAfterRemovingPrefix = removePrefix( word, morphologyData.prefixes ); if ( wordAfterRemovingPrefix !== word ) { stemmedWord = dictionaryStemmer[ wordAfterRemovingPrefix ]; if ( stemmedWord ) { return stemmedWord; } // If a prefix was removed but the word was still not found, try removing another prefix and search in the dictionary again. const wordAfterRemovingSecondPrefix = removePrefix( wordAfterRemovingPrefix, morphologyData.prefixes ); if ( wordAfterRemovingSecondPrefix !== wordAfterRemovingPrefix ) { stemmedWord = dictionaryStemmer[ wordAfterRemovingSecondPrefix ]; if ( stemmedWord ) { return stemmedWord; } } } return word; }