yoastseo-dep
Version:
Yoast clientside page analysis
130 lines (112 loc) • 4.77 kB
JavaScript
import getSentences from "../helpers/sentence/getSentences.js";
import { stripFullTags as stripHTMLTags } from "../helpers/sanitize/stripHTMLTags.js";
import Sentence from "../../languageProcessing/values/Sentence.js";
import { forEach } from "lodash-es";
import removeHtmlBlocks from "../helpers/html/htmlParser";
import { filterShortcodesFromHTML } from "../helpers";
/**
* Looks for morphological passive voice.
* Supported morphological languages: "ru", "sv", "id", "ar", "he", "tr", "fa".
* Farsi is implemented as morphological because the periphrastic passives are used as compound verbs (among other compound passives).
* @param {Paper} paper The paper object.
* @param {Researcher} researcher The researcher.
*
* @returns {Object} The found passive sentences.
*/
export const getMorphologicalPassives = function( paper, researcher ) {
const isPassiveSentence = researcher.getHelper( "isPassiveSentence" );
let text = paper.getText();
text = removeHtmlBlocks( text );
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
const sentences = getSentences( text, memoizedTokenizer )
.map( function( sentence ) {
return new Sentence( sentence );
} );
const totalNumberSentences = sentences.length;
const passiveSentences = [];
forEach( sentences, function( sentence ) {
const strippedSentence = stripHTMLTags( sentence.getSentenceText() ).toLocaleLowerCase();
sentence.setPassive( isPassiveSentence( strippedSentence ) );
if ( sentence.isPassive() === true ) {
passiveSentences.push( sentence.getSentenceText() );
}
} );
return {
total: totalNumberSentences,
passives: passiveSentences,
};
};
/**
* Looks for periphrastic passive voice.
* Supported periphrastic languages: "en", "de", "nl", "fr", "es", "it", "pt", "pl", "sk".
*
* @param {Paper} paper The paper object.
* @param {Researcher} researcher The researcher.
*
* @returns {Object} The found passive sentences.
*/
export const getPeriphrasticPassives = function( paper, researcher ) {
const getClauses = researcher.getHelper( "getClauses" );
let text = paper.getText();
text = removeHtmlBlocks( text );
text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
const sentences = getSentences( text, memoizedTokenizer )
.map( function( sentence ) {
return new Sentence( sentence );
} );
const totalNumberSentences = sentences.length;
const passiveSentences = [];
forEach( sentences, function( sentence ) {
const strippedSentence = stripHTMLTags( sentence.getSentenceText() ).toLocaleLowerCase();
// The functionality based on sentencePart objects should be rewritten using array indices of stopwords and auxiliaries.
// Divide a sentence into clauses and return an array of clause objects that have been checked for passiveness.
const clauses = getClauses( strippedSentence );
sentence.setClauses( clauses );
// Check sentence passiveness based on its clause passiveness.
if ( sentence.isPassive() ) {
passiveSentences.push( sentence.getSentenceText() );
}
} );
return {
total: totalNumberSentences,
passives: passiveSentences,
};
};
/**
* Looks for both morphological and periphrastic passive voice
* Supported languages with both morphological and periphrastic passives: "hu", "nb".
* Due to technical difficulties "nb" is only implemented as periphrastic at the moment. Languages that have not been implemented yet: "da".
*
* @param {Paper} paper The paper object.
* @param {Researcher} researcher The researcher.
*
* @returns {Object} The found passive sentences.
*/
const getMorphologicalAndPeriphrasticPassive = function( paper, researcher ) {
const morphologicalPassives = getMorphologicalPassives( paper, researcher );
const periphrasticPassives = getPeriphrasticPassives( paper, researcher ).passives;
return {
total: morphologicalPassives.total,
passives: periphrasticPassives.concat( morphologicalPassives.passives ),
};
};
/**
* Looks for passive voice.
*
* @param {Paper} paper The paper object.
* @param {Researcher} researcher The researcher.
*
* @returns {Object} The found passive sentences.
*/
export default function getPassiveVoice( paper, researcher ) {
const passiveType = researcher.getConfig( "passiveConstructionType" );
if ( passiveType === "periphrastic" ) {
return getPeriphrasticPassives( paper, researcher );
}
if ( passiveType === "morphological" ) {
return getMorphologicalPassives( paper, researcher );
}
return getMorphologicalAndPeriphrasticPassive( paper, researcher );
}