UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

128 lines (122 loc) 6.73 kB
import getClauses from "../../../../../src/languageProcessing/helpers/passiveVoice/periphrastic/getClauses.js"; import arrayToRegex from "../../../../../src/languageProcessing/helpers/regex/createRegexFromArray"; import Clause from "../../../../../src/languageProcessing/values/Clause"; import EnglishClause from "../../../../../src/languageProcessing/languages/en/values/Clause"; const options1 = { Clause: Clause, stopwords: [ "to", "which", "who", "whom", "that" ], auxiliaries: [ "am", "is", "are", "est", "es", "sont" ], ingExclusions: [ "king", "cling", "ring", "being", "thing", "something", "anything" ], regexes: { auxiliaryRegex: arrayToRegex( [ "am", "is", "are", "est", "es", "sont" ] ), stopCharacterRegex: /([:,]|('ll)|('ve))(?=[ \n\r\t'"+\-»«‹›<>])/ig, verbEndingInIngRegex: /\w+ing(?=$|[ \n\r\t.,'()"+\-;!?:/»«‹›<>])/ig, followingAuxiliaryExceptionRegex: arrayToRegex( [ "le", "la", "les", "el" ] ), directPrecedenceExceptionRegex: arrayToRegex( [ "se", "me", "te", "s'y" ] ), elisionAuxiliaryExceptionRegex: arrayToRegex( [ "c'", "s'", "peut-" ], true ), }, otherStopWordIndices: [], }; const options2 = { Clause: EnglishClause, auxiliaries: [ "am", "is", "are", "est", "es", "sont" ], stopwords: [ "to", "which", "who", "whom", "that" ], regexes: { auxiliaryRegex: arrayToRegex( [ "am", "is", "are", "est", "es", "sont" ] ), stopCharacterRegex: /([:,]|('ll)|('ve))(?=[ \n\r\t'"+\-»«‹›<>])/ig, }, otherStopWordIndices: [], }; const options3 = { Clause: Clause, auxiliaries: [ "am", "is", "are", "est", "es", "sont" ], stopwords: [ "to", "which", "who", "whom", "that" ], regexes: { auxiliaryRegex: arrayToRegex( [ "am", "is", "are", "est", "es", "sont" ] ), stopCharacterRegex: /([:,]|('ll)|('ve))(?=[ \n\r\t'"+\-»«‹›<>])/ig, followingAuxiliaryExceptionRegex: arrayToRegex( [ "le", "la", "les", "el" ] ), }, }; const options4 = { Clause: Clause, stopwords: [ "to", "which", "who", "whom", "that" ], auxiliaries: [ "am", "is", "are", "est", "es", "sont" ], ingExclusions: [ "king", "cling", "ring", "being", "thing", "something", "anything" ], regexes: { auxiliaryRegex: arrayToRegex( [ "am", "is", "are", "est", "es", "sont" ] ), stopCharacterRegex: /([:,]|('ll)|('ve))(?=[ \n\r\t'"+\-»«‹›<>])/ig, verbEndingInIngRegex: /\w+ing(?=$|[ \n\r\t.,'()"+\-;!?:/»«‹›<>])/ig, followingAuxiliaryExceptionRegex: arrayToRegex( [ "le", "la", "les", "el" ] ), directPrecedenceExceptionRegex: arrayToRegex( [ "se", "me", "te", "s'y" ] ), elisionAuxiliaryExceptionRegex: arrayToRegex( [ "c'", "s'", "peut-" ], true ), }, otherStopWordIndices: [ { index: 23, match: "ing" } ], }; describe( "splits sentences into clauses", function() { it( "filters out clauses without auxiliary", function() { const sentence = "The English are always throwing parties."; expect( getClauses( sentence, options4 )[ 0 ].getClauseText() ).toBe( "are always" ); expect( getClauses( sentence, options4 )[ 0 ].isPassive() ).toBe( false ); expect( getClauses( sentence, options4 ).length ).toBe( 1 ); } ); it( "returns empty array if no auxiliary present in the sentence", function() { const sentence = "A comely lord."; expect( getClauses( sentence, options1 ) ).toEqual( [] ); } ); it( "doesn't return clauses when an auxiliary is preceded by a reflexive pronoun", function() { const sentence = "Ils se sont lavés."; expect( getClauses( sentence, options1 ).length ).toBe( 0 ); } ); it( "doesn't return clauses when an auxiliary is preceded by an elided reflexive pronoun", function() { const sentence = "L’emballement s'est prolongé mardi 9 janvier."; expect( getClauses( sentence, options1 ).length ).toBe( 0 ); } ); it( "doesn't split on sentence breakers within words", function() { // Sentence breaker: 'is' in 'praise'. const sentence = "Commented is praise due."; expect( getClauses( sentence, options1 )[ 0 ].getClauseText() ).toBe( "is praise due." ); expect( getClauses( sentence, options1 ).length ).toBe( 1 ); } ); it( "splits sentences on stop characters", function() { const sentence = "It is a hands-free, voice-controlled device."; expect( getClauses( sentence, options1 )[ 0 ].getClauseText() ).toBe( "is a hands-free" ); expect( getClauses( sentence, options1 ).length ).toBe( 1 ); } ); it( "doesn't split sentences on stop characters that are not preceded by a word and also not followed by a space/punctuation mark", function() { const sentence = "It is a 1,000,000 dollar house."; expect( getClauses( sentence, options1 )[ 0 ].getClauseText() ).toBe( "is a 1,000,000 dollar house." ); expect( getClauses( sentence, options1 ).length ).toBe( 1 ); } ); it( "splits sentences on stop characters when followed by a punctuation mark", function() { const sentence = "\"This is it\", he said."; expect( getClauses( sentence, options1 )[ 0 ].getClauseText() ).toBe( "is it\"" ); expect( getClauses( sentence, options1 ).length ).toBe( 1 ); } ); it( "doesn't return clauses when an auxiliary is followed by a word from the followingAuxiliaryExceptionWords list", function() { // Exception word after auxiliary: el. let sentence = "Es el capítulo preferido de varios miembros del equipo de producción."; expect( getClauses( sentence, options1 ).length ).toBe( 0 ); // Exception word after auxiliary: le. sentence = "C'est le film le plus vu."; expect( getClauses( sentence, options1 ).length ).toBe( 0 ); } ); it( "returns clauses when there is no directPrecedenceException and followingAuxiliaryExceptionWords lists available", function() { const sentence = "The cat is vaccinated."; expect( getClauses( sentence, options2 )[ 0 ].getClauseText() ).toBe( "is vaccinated." ); expect( getClauses( sentence, options2 ).length ).toBe( 1 ); } ); it( "returns clause(s) and correctly identify its passiveness when using language specific Clause class", function() { let sentence = "The cat is vaccinated."; expect( getClauses( sentence, options2 )[ 0 ].getClauseText() ).toBe( "is vaccinated." ); expect( getClauses( sentence, options2 )[ 0 ].isPassive() ).toBe( true ); sentence = "The tortie cat is pretty."; expect( getClauses( sentence, options2 )[ 0 ].getClauseText() ).toBe( "is pretty." ); expect( getClauses( sentence, options2 )[ 0 ].isPassive() ).toBe( false ); } ); it( "doesn't return clauses when an auxiliary is followed by a word from the followingAuxiliaryExceptionWords list " + "and when the directPrecedenceException list is not available.", function() { // Exception word after auxiliary: le. const sentence = "C'est le film le plus vu."; expect( getClauses( sentence, options3 ).length ).toBe( 0 ); } ); } );