UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

88 lines (84 loc) 3.63 kB
import getWordsForHTMLParser from "../../../../src/languageProcessing/helpers/word/getWordsForHTMLParser"; const testCases = [ { description: "returns an empty array for an empty string", text: "", expectedResult: [], }, { description: "correctly tokenizes a phrase with no punctuation", text: "a simple phrase", expectedResult: [ "a", " ", "simple", " ", "phrase" ], }, { description: "correctly tokenizes a simple sentence", text: "This is a simple sentence.", expectedResult: [ "This", " ", "is", " ", "a", " ", "simple", " ", "sentence", "." ], }, { description: "correctly tokenizes a sentence with a comma", text: "This is a simple sentence, with a comma.", expectedResult: [ "This", " ", "is", " ", "a", " ", "simple", " ", "sentence", ",", " ", "with", " ", "a", " ", "comma", "." ], }, { description: "correctly tokenizes a phrase with a hyphen", text: "a-hyphenated-phrase", expectedResult: [ "a-hyphenated-phrase" ], }, { description: "correctly tokenizes a phrase with an apostrophe", text: "a phrase with an apostrophe's", expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "apostrophe's" ], }, { description: "correctly tokenizes a phrase between quotes", text: "\"a phrase between quotes\"", expectedResult: [ "\"", "a", " ", "phrase", " ", "between", " ", "quotes", "\"" ], }, { description: "correctly tokenizes a phrase between parentheses", text: "(a phrase between parentheses)", expectedResult: [ "(", "a", " ", "phrase", " ", "between", " ", "parentheses", ")" ], }, { description: "correctly tokenizes a phrase that starts with a space", text: " a phrase that starts with a space", expectedResult: [ " ", "a", " ", "phrase", " ", "that", " ", "starts", " ", "with", " ", "a", " ", "space" ], }, { description: "correctly tokenizes a phrase that ends with a space", text: "a phrase that ends with a space ", expectedResult: [ "a", " ", "phrase", " ", "that", " ", "ends", " ", "with", " ", "a", " ", "space", " " ], }, { description: "correctly tokenizes a phrase that is separated by non-breaking spaces", text: "a\u00a0phrase\u00a0that\u00a0is\u00a0separated\u00a0by\u00a0non-breaking\u00a0spaces", expectedResult: [ "a", "\u00a0", "phrase", "\u00a0", "that", "\u00a0", "is", "\u00a0", "separated", "\u00a0", "by", "\u00a0", "non-breaking", "\u00a0", "spaces" ], }, { description: "correctly tokenizes a phrase that is separated by tabs", text: "a\tphrase\tthat\tis\tseparated\tby\ttabs", expectedResult: [ "a", "\t", "phrase", "\t", "that", "\t", "is", "\t", "separated", "\t", "by", "\t", "tabs" ], }, { description: "correctly tokenizes a shortcode", text: "[caption id=\"attachment_3341501\" align=\"alignnone\" width=\"300\"]", expectedResult: [ "[", "caption", " ", "id=\"attachment_3341501", "\"", " ", "align=\"alignnone", "\"", " ", "width=\"300", "\"", "]" ], }, { description: "correctly tokenizes a shortcode with text", text: "[caption]test[/caption]", expectedResult: [ "[", "caption", "]", "test", "[", "/caption", "]" ], }, { description: "doesn't match with a hashed HTML entity (in this case, '#trade;' for '™') in the beginning or the end of the word", text: "one trademark#trade;, and another '#trade;trademark'", expectedResult: [ "one", " ", "trademark#trade;", ",", " ", "and", " ", "another", " ", "'", "#trade;trademark", "'" ], }, ]; describe.each( testCases )( "getWordsForHTMLParser", ( { description, text, expectedResult } ) => { it( description, () => { expect( getWordsForHTMLParser( text ) ).toEqual( expectedResult ); } ); } );