UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

155 lines (133 loc) 5.87 kB
import { isUndefined, flatten } from "lodash-es"; import { languageProcessing } from "yoastseo-dep"; const { buildFormRule, createRulesFromArrays } = languageProcessing; import getAdjectiveStem from "./getAdjectiveStem"; import { getInfinitive, checkIrregulars as getIrregularVerbParadigm, endsWithIng } from "./getVerbStem.js"; /** * Gets the shortest of the alphabetically ordered strings from an array. * * @param {string[]} array The array of strings. * * @returns {string|undefined} The shortest of the alphabetically ordered strings from the input array; * undefined if the input array is empty. */ export function findShortestAndAlphabeticallyFirst( array ) { const strings = flatten( array ); let result = strings.pop(); strings.forEach( str => { const lengthDifference = str.length - result.length; if ( lengthDifference === 0 ) { if ( str.localeCompare( result ) < 0 ) { result = str; } } else if ( lengthDifference < 0 ) { result = str; } } ); return result; } /** * Checks if the input word occurs in the list of exceptions and if so returns the first form of the paradigm, which is * always the base. * * @param {string} word The word for which to determine its base. * @param {Array} irregulars An array of irregular nouns and adjectives. * * @returns {string|null} The base form of the irregular word; null if no irregular stem was found. */ export function determineIrregularStem( word, irregulars ) { for ( let i = 0; i < irregulars.length; i++ ) { const paradigm = irregulars[ i ]; for ( let j = 0; j < paradigm.length; j++ ) { if ( paradigm[ j ] === word ) { return paradigm[ 0 ]; } } } return null; } /** * Checks if the input word occurs in the list of exception verbs and if so returns the first form * of the paradigm, which is always the base. Contrary to nouns and adjectives, irregular verbs can have different prefixes * which are not included in the list of exceptions and have to be processed separately. * * @param {string} word The word for which to determine its base. * @param {Object} verbMorphology Regexes and irregulars for verb morphology, False if verb rules should not be applied. * * @returns {string|null} The base form of the irregular word; null if no irregular stem was found. */ export function determineIrregularVerbStem( word, verbMorphology ) { const paradigmIfIrregularVerb = getIrregularVerbParadigm( word, verbMorphology.irregularVerbs, verbMorphology.regexVerb.verbPrefixes ); if ( ! isUndefined( paradigmIfIrregularVerb ) ) { return paradigmIfIrregularVerb[ 0 ]; } return null; } /** * Gets possible stems as a regular noun, adjective and verb. * * @param {string} word The word for which to determine its base. * @param {Object} morphologyData The morphology data for the language. * * @returns {string} The shortest and the alphabetically-first of possible noun-like, verb-like and adjective-like bases. */ export function determineRegularStem( word, morphologyData ) { // Try to singularize as a noun. const regexVerb = morphologyData.verbs.regexVerb; const baseIfPluralNoun = buildFormRule( word, createRulesFromArrays( morphologyData.nouns.regexNoun.singularize ) ); if ( ! isUndefined( baseIfPluralNoun ) ) { // Bring ing-nouns to base forms ("blessings" -> "bless"). if ( endsWithIng( baseIfPluralNoun ) ) { return buildFormRule( baseIfPluralNoun, createRulesFromArrays( regexVerb.ingFormToInfinitive ) ); } return baseIfPluralNoun; } // Check if the word ends with "ic", "ical" or "ically". Return the "ical" form for consistency. const regexAdjective = morphologyData.adjectives.regexAdjective; const baseIfIcally = buildFormRule( word, createRulesFromArrays( regexAdjective.icallyToBase ) ); if ( ! isUndefined( baseIfIcally ) ) { return baseIfIcally; } // No more quick guesses, we have to determine a verbal infinitive and an adjectival base. const possibleRegularBases = []; // Verbal infinitive. const baseIfVerb = getInfinitive( word, regexVerb ).infinitive; possibleRegularBases.push( baseIfVerb ); // Adjectival base. const stopAdjectives = morphologyData.adjectives.stopAdjectives; const baseIfAdjective = getAdjectiveStem( word, regexAdjective, stopAdjectives ).base; possibleRegularBases.push( baseIfAdjective ); return findShortestAndAlphabeticallyFirst( possibleRegularBases ); } /** * Returns the stem of the input word using the morphologyData (language-specific). * * @param {string} word The word to get the stem for. * @param {Object} morphologyData The available morphology data per language (false if unavailable). * * @returns {string} Stemmed (or base) form of the word. */ export function determineStem( word, morphologyData ) { const nounMorphology = morphologyData.nouns; const baseIfPossessive = buildFormRule( word, createRulesFromArrays( nounMorphology.regexNoun.possessiveToBase ) ); let stem, irregular; // Determine if this is an irregular noun, adjective or verb. if ( isUndefined( baseIfPossessive ) ) { stem = word; // Word can be a noun, adjective or verb. irregular = determineIrregularStem( word, nounMorphology.irregularNouns ) || determineIrregularStem( word, morphologyData.adjectives.irregularAdjectives ) || determineIrregularVerbStem( word, morphologyData.verbs ); } else { stem = baseIfPossessive; // The word is a possessive, it can only be a noun or an ing-noun; irregular = determineIrregularStem( baseIfPossessive, nounMorphology.irregularNouns ); } if ( irregular ) { // Found the stem of an irregular word! Return it. return irregular; } // Treat the word as a regular noun, adjective or verb. return determineRegularStem( stem, morphologyData ); } export default determineStem;