UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

138 lines (119 loc) 3.9 kB
import getWords from "../../../../src/languageProcessing/helpers/word/getWords"; describe( "a test for getting words from a sentence", function() { it( "returns an empty array", function() { expect( getWords( "" ).length ).toBe( 0 ); } ); it( "returns an array without html", function() { const words = getWords( "<strong>strong</strong> and <em>emphasized</em>" ); expect( words[ 0 ] ).toBe( "strong" ); expect( words[ 1 ] ).toBe( "and" ); expect( words[ 2 ] ).toBe( "emphasized" ); } ); it( "returns an array without the space comma", function() { const words = getWords( "strong , emphasized" ); expect( words[ 0 ] ).toBe( "strong" ); expect( words[ 1 ] ).toBe( "emphasized" ); } ); it( "returns the correct array of words from a text containing a lot of punctuation", function() { const words = getWords( "A sentence—with words. And some; punctuation." ); expect( words ).toEqual( [ "A", "sentence", "with", "words", "And", "some", "punctuation", ] ); } ); it( "does not do anything with repetitions", function() { const words = getWords( "A sentence sentence, sentence! Sentence with words." ); expect( words ).toEqual( [ "A", "sentence", "sentence", "sentence", "Sentence", "with", "words", ] ); } ); it( "doesn't remove punctuation when doRemovePunctuation is false.", () => { const text = "A sentence with words. And some; punctuation."; const words = getWords( text, false ); expect( words ).toEqual( [ "A", "sentence", "with", "words", ".", "And", "some", ";", "punctuation", ".", ] ); } ); it( "doesn't return non-breaking space &nbsp; in the result", () => { const text = "<p>Sri Tandjung noted that Javanese had been eating cooked (native black) soybeans since the 12th&nbsp;century.</p>\n"; expect( getWords( text ) ).toEqual( [ "Sri", "Tandjung", "noted", "that", "Javanese", "had", "been", "eating", "cooked", "native", "black", "soybeans", "since", "the", "12th", "century" ] ); } ); it( "gets words from text containing html tags", function() { const text = "<p>A very intelligent cat loves their human. A dog is very cute.</p><h3>A subheading 3" + "</h3>text text text<h4>A subheading 4</h4>more text."; expect( getWords( text ).length ).toBe( 23 ); expect( getWords( text ) ).toEqual( [ "A", "very", "intelligent", "cat", "loves", "their", "human", "A", "dog", "is", "very", "cute", "A", "subheading", "3", "text", "text", "text", "A", "subheading", "4", "more", "text" ] ); } ); } ); describe( "language-specific tests for getting words", function() { it( "returns words without special Arabic punctuation marks: ،؟؛", function() { const words = getWords( "ما هي المقالات الجيدة؟ السلطان خوارزمشاه، وعدم تنسيق سبل المقاومة،" + " كانت كلها أسبابًا لفشل ذلك الصمود. جدول قواسم الأعداد من 1 إلى العدد 1000؛ وقاسم" ); expect( words ).toEqual( [ "ما", "هي", "المقالات", "الجيدة", "السلطان", "خوارزمشاه", "وعدم", "تنسيق", "سبل", "المقاومة", "كانت", "كلها", "أسبابًا", "لفشل", "ذلك", "الصمود", "جدول", "قواسم", "الأعداد", "من", "1", "إلى", "العدد", "1000", "وقاسم", ] ); } ); it( "returns words without special Urdu punctuation marks: ۔", function() { const words = getWords( "اس دوران میں وہ حملے کرتے رہے اور آخرکار شکست کھا گئے۔" ); expect( words ).toEqual( [ "اس", "دوران", "میں", "وہ", "حملے", "کرتے", "رہے", "اور", "آخرکار", "شکست", "کھا", "گئے", ] ); } ); } );