UNPKG

document-highlighter

Version:
371 lines (355 loc) 16.1 kB
'use strict'; require('should'); var documentHighlight = require('../lib'); var generateTextIt = function(description, text, query, options, expected) { it(description, function() { var ret = documentHighlight.text(text, query, options); ret.text.should.eql(expected); }); }; var generateHtmlIt = function(description, text, query, options, expected) { it(description, function() { var ret = documentHighlight.html(text, query, options); ret.html.should.eql(expected); }); }; var generateIts = function(its, func) { var defaultOptions = { before: '*', after: '*' }; for(var itShould in its) { var itDatas = its[itShould]; func(itShould, itDatas.text, itDatas.query, itDatas.options || defaultOptions, itDatas.expected); } }; describe('Standard mode', function() { describe('with text content', function() { var its = { 'should not modify non-matching text': { text: 'Hello and welcome to the real world, Neo', query: 'non matching query', expected: 'Hello and welcome to the real world, Neo' }, 'should do nothing with the empty query': { text: 'Hello and welcome to the real world, Neo', query: '', expected: 'Hello and welcome to the real world, Neo' }, 'should highlight relevant text': { text: 'Hello and welcome to the real world, Neo', query: 'welcome to the real world', expected: 'Hello and *welcome to the real world*, Neo', }, 'should highlight all relevant text': { text: 'Hello and welcome to the real world, Neo. This world is mine, not your old world.', query: 'world', expected: 'Hello and welcome to the real *world*, Neo. This *world* is mine, not your old *world*.', }, 'should be case insensitive to the text': { text: 'Hello and WELCOME to the real world, Neo', query: 'welcome to the real world', expected: 'Hello and *WELCOME to the real world*, Neo', }, 'should be case insensitive to the query': { text: 'Hello and welcome to the real world, Neo', query: 'WELCOME to the real world', expected: 'Hello and *welcome to the real world*, Neo', }, 'should use unicode mapping for the text': { text: 'Vous souhaitez régler votre loyer, constituer votre épargne ou effectuer un virement régulier ?', query: 'regler votre loyer', expected: 'Vous souhaitez *régler votre loyer*, constituer votre épargne ou effectuer un virement régulier ?', options: { before: '*', after: '*', language: 'fr' } }, 'should use insensitive unicode mapping for the text': { text: 'Vous souhaitez RÉGLER votre loyer, constituer votre épargne ou effectuer un virement régulier ?', query: 'régler votre loyer', expected: 'Vous souhaitez *RÉGLER votre loyer*, constituer votre épargne ou effectuer un virement régulier ?', options: { before: '*', after: '*', language: 'fr' } }, 'should use unicode mapping for the query': { text: 'Vous souhaitez regler votre loyer, constituer votre épargne ou effectuer un virement régulier ?', query: 'régler votre loyer', expected: 'Vous souhaitez *regler votre loyer*, constituer votre épargne ou effectuer un virement régulier ?', options: { before: '*', after: '*', language: 'fr' } }, 'should match suffixes in the text': { text: 'Hello and welcome to the reals worlds, Neo', query: 'welcome to the real world', expected: 'Hello and *welcome to the reals worlds*, Neo', }, 'should match suffixes in the query': { text: 'Hello and welcome to the real world, Neo', query: 'welcome to the reals worlds', expected: 'Hello and *welcome to the real world*, Neo', }, 'should split non contiguous queries': { text: 'In JavaScript, you can define a callback handler in regex string replace operations', query: 'Javascript callback operations', expected: 'In *JavaScript*, you can define a *callback* handler in regex string replace *operations*', }, 'should split non contiguous queries and highlight longest match': { text: 'In JavaScript, you can define a callback handler in regex string replace operations', query: 'callback handler in operations', expected: 'In JavaScript, you can define a *callback handler in* regex string replace *operations*', }, 'should not highlight stop words': { text: 'Hello to the real world, Neo', query: 'Welcome to the probably real world', expected: 'Hello to the *real world*, Neo', }, 'should include stop-words queries': { text: 'Hello and farewell to the real world, Neo', query: 'farewell real world', expected: 'Hello and *farewell to the real world*, Neo', }, 'should allow for multiple consecutive whitespace characters': { text: 'Unicorns eat, drink and are merry', query: 'eat drink', expected: 'Unicorns *eat, drink* and are merry', }, 'should allow for punctuations': { text: 'Eat, drink and be merry', query: 'eat drink', expected: '*Eat, drink* and be merry', }, 'should allow for numbers': { text: 'Similarity algorithm bm25 is awesome.', query: 'bm25', expected: 'Similarity algorithm *bm25* is awesome.', }, 'should allow for symbols': { text: 'FILE #BA/15/42', query: 'file ba 15 42', expected: '*FILE* #*BA*/*15*/*42*', }, 'should highlight multiple paragraphs': { text: 'Hello and welcome to the real world, Neo.\nTrinity will be there soon.', query: 'Neo Trinity', expected: 'Hello and welcome to the real world, *Neo*.\n*Trinity* will be there soon.', }, 'should work on longer texts': { text: "The index analysis module acts as a configurable registry of Analyzers that can be used in order to both break indexed (analyzed) fields when a document is indexed and process query strings. It maps to the Lucene Analyzer.", query: "The index analysis string", expected: "*The index analysis* module acts as a configurable registry of Analyzers that can be used in order to both break indexed (analyzed) fields when a document is indexed and process query *strings*. It maps to the Lucene Analyzer." } }; generateIts(its, generateTextIt); it('should allow for regexp chars in query', function() { documentHighlight.text("my ^text$", "^text$").text.should.eql("my ^<strong>text</strong>$"); }); it('should return highlighted text and indices', function() { var ret = documentHighlight.text("Farewell and welcome to the real world.", "farewell world"); var expected = { text: '<strong>Farewell</strong> and welcome to the real <strong>world</strong>.', indices: [ { startIndex: 0, endIndex: 8, content: 'Farewell' }, { startIndex: 33, endIndex: 38, content: 'world' } ] }; ret.should.eql(expected); }); }); describe('with HTML content', function() { var its = { 'should not modify non-matching text': { text: 'Hello and <span>welcome to the</span> real world, Neo', query: 'non matching query', expected: 'Hello and <span>welcome to the</span> real world, Neo' }, 'should highlight and maintain HTML': { text: '<strong>Hello</strong> and welcome to the real world, Neo', query: 'welcome to the real world', expected: '<strong>Hello</strong> and *welcome to the real world*, Neo', }, 'should highlight and maintain HTML inside query': { text: 'Hello and welcome to the <strong>real</strong> world, Neo', query: 'welcome to the real world', expected: 'Hello and *welcome to the <strong>real</strong> world*, Neo', }, 'should highlight and maintain HTML inside query in edge case': { text: 'Hello and welcome to the <strong>real world</strong>, Neo', query: 'welcome to the real world', expected: 'Hello and *welcome to the <strong>real world</strong>*, Neo', }, 'should match multiples fragments': { text: 'In JavaScript, <em>you can define a callback handler in regex</em> string replace operations', query: 'callback handler operations', expected: 'In JavaScript, <em>you can define a *callback handler* in regex</em> string replace *operations*', }, // 'should match multiples fragments in blocks': { // text: '<div>alex</div><div><br></div>trinity', // query: 'alex trinity', // expected: '<div>*alex*</div><div><br></div>*trinity*', // }, 'should skip empty HTML': { text: 'Hello and welcome to<span class="a_0__0"></span> the real world, Neo', query: 'welcome to the real world', expected: 'Hello and *welcome to<span class="a_0__0"></span> the real world*, Neo', }, 'should skip embedded empty HTML': { text: 'Hello and wel<span class="a_0__0"></span>come to the real world, Neo', query: 'welcome to the real world', expected: 'Hello and *wel<span class="a_0__0"></span>come to the real world*, Neo', }, 'should work with dirty HTML': { text: 'Hello and wel<>come <!-- -->to the real world, Neo', query: 'Neo', expected: 'Hello and wel<>come <!-- -->to the real world, *Neo*', }, 'should return well-formed HTML': { text: 'Hello and welcome to <strong>the real world, Neo</strong>', query: 'welcome to the real world', expected: 'Hello and *welcome to *<strong>*the real world*, Neo</strong>', }, 'should highlight multiple paragraphs': { text: '<p>Hello and welcome to the real world, Neo.</p><p>Trinity will be there soon.</p>', query: 'Neo Trinity', expected: '<p>Hello and welcome to the real world, *Neo.*</p><p>*Trinity* will be there soon.</p>', }, 'should handle block elements': { text: '<p>Hello</p><p>Trinity</p>', query: 'Trinity', expected: '<p>Hello</p><p>*Trinity*</p>', }, 'should handle block elements with punctuation': { text: '<p>Hello and welcome to the real world, Neo.</p><p>Trinity will be there soon.</p>', query: 'Neo Trinity', expected: '<p>Hello and welcome to the real world, *Neo.*</p><p>*Trinity* will be there soon.</p>', }, 'should use secondary highlight': { text: '<strong>Hello and welcome to the real world</strong> Neo.', query: 'world Neo', options: { before: '<span>', beforeSecond: '<span class=secondary>', after: '</span>', }, expected: '<strong>Hello and welcome to the real <span>world</span></strong><span class=secondary> Neo</span>.', }, 'should use before second and after second highlight': { text: '<i>Hello and welcome to the real world</i> Neo.', query: 'world Neo', expected: '<i>Hello and welcome to the real <strong>world</strong></i><span class=secondary> Neo</span>.', options: { before: '<strong>', after: '</strong>', beforeSecond: '<span class=secondary>', afterSecond: '</span>', } }, 'should skip markup with non-textual content': { text: '<style>abbr { font-size:2em; }</style> <p>This font</p>', query: 'font', expected: '<style>abbr { font-size:2em; }</style> <p>This *font*</p>', }, 'should not allow block markup to get caught in the middle of a match': { text: 'Hello and welcome to the real world <div>Neo</div>and Trinity.', query: 'world Neo Trinity', expected: 'Hello and welcome to the real *world *<div>*Neo*</div>*and Trinity*.', }, 'should allow for self closing noClosing elements': { text: '<html><head title="foo" /><body>Hello and welcome to the real world Neo and Trinity.</body></html>', query: 'Neo Trinity', expected: '<html><head title="foo" /><body>Hello and welcome to the real world *Neo and Trinity*.</body></html>', }, 'should allow for empty block elements': { text: 'I just sent you a meeting invitation.<div><br></div><div>Mark</div></div>', query: 'Mark', expected: 'I just sent you a meeting invitation.<div><br></div><div>*Mark*</div></div>', }, }; generateIts(its, generateHtmlIt); it('should fail on invalid markup', function() { try { documentHighlight.html("<hello world", "world"); } catch(e) { return; } throw new Error("Invalid markup should not be parsed"); }); it('should add a complimentary space character if needed', function() { var html = '<p><span class="greeting">Hello</span> and welcome<br/>to the real world, Neo</p>'; var query = 'welcome to the real world'; var expected = 'Hello and *welcome to the real world*, Neo'; var ret = documentHighlight.html(html, query, { before: '*', after: '*' }); ret.text.should.eql(expected); }); describe('in edge cases with existing markup', function() { // [---] is the highlight query, // (---) the existing markup var its = { '---(--[--------]--)----': { text: '<strong>Eat drink and be merry</strong> for tomorrow we die', query: 'drink', expected: '<strong>Eat *drink* and be merry</strong> for tomorrow we die', }, '------[-(----)-]-------': { text: 'Eat <strong>drink</strong> and be merry for tomorrow we die', query: 'Eat drink and be merry', expected: '*Eat <strong>drink</strong> and be merry* for tomorrow we die', }, '------[(------)]-------': { text: 'Eat <strong>drink</strong> and be merry for tomorrow we die', query: 'drink', expected: 'Eat *<strong>drink</strong>* and be merry for tomorrow we die', }, '--(---[---)----]-------': { text: '<strong>Eat drink and be merry</strong> for tomorrow we die', query: 'merry for tomorrow', expected: '<strong>Eat drink and be *merry*</strong>* for tomorrow* we die', }, '------[----(---]---)---': { text: 'Eat <strong>drink and be merry</strong> for tomorrow we die', query: 'Eat drink', expected: '*Eat *<strong>*drink* and be merry</strong> for tomorrow we die', }, '------[(---)---]-------': { text: '<strong>Eat drink</strong> and be merry for tomorrow we die', query: 'Eat drink and be merry', expected: '*<strong>Eat drink</strong> and be merry* for tomorrow we die', }, '------[---(---)]-------': { text: 'Eat drink <strong>and be merry</strong> for tomorrow we die', query: 'Eat drink and be merry', expected: '*Eat drink <strong>and be merry</strong>* for tomorrow we die', }, '--(--)[--------]-------': { text: '<strong>Eat</strong> drink and be merry for tomorrow we die', query: 'drink and be merry', expected: '<strong>Eat</strong> *drink and be merry* for tomorrow we die', }, '------[--------](-----)': { text: 'Eat drink <strong>and be merry</strong> for tomorrow we die', query: 'for tomorrow we die', expected: 'Eat drink <strong>and be merry</strong> *for tomorrow we die*', }, }; generateIts(its, generateHtmlIt); }); }); });