sbd-ts

Version:

Split text into sentences with Sentence Boundary Detection (SBD).

59 lines (44 loc) • 1.58 kB

text/typescript

/*jshint node:true, laxcomma:true */ /*global describe:true, it:true */ "use strict"; import { assert } from "chai"; import tokenizer from '../lib/tokenizer'; describe('Empty', function () { describe('string', function () { var entry = ""; var sentences = tokenizer.sentences(entry); it('should not get a sentence', function () { assert.equal(sentences.length, 0); }); var entry = " \n\n "; var sentences = tokenizer.sentences(entry); it('should not get a sentence from whitespace', function () { assert.equal(sentences.length, 0); }); }); describe('undefined', function () { var sentences = tokenizer.sentences(); it('should not get a sentence', function () { assert.equal(sentences.length, 0); }); }); describe('non string', function () { var entry = []; var sentences = tokenizer.sentences(entry); it('should not get a sentence from array', function () { assert.equal(sentences.length, 0); }); var entry = {} as any[] ; var sentences = tokenizer.sentences(entry); it('should not get a sentence from object', function () { assert.equal(sentences.length, 0); }); }); describe('symbols only', function () { var entry = "^&%(*&"; var sentences = tokenizer.sentences(entry); it('should not single entry', function () { assert.equal(sentences.length, 1); }); }); });