UNPKG

@hiherto-elements/natural

Version:

minimal es6 natural language detection

hiherto-elements.github.io/es-next-modules/natural/

hiherto-elements/es-next-modules

15 lines (11 loc) • 530 B

JavaScript

import Tokenizer from './tokenizer.js'; export class SentenceTokenizer extends Tokenizer{ tokenize(text) { // break string up in to sentences based on punctation and quotation marks let tokens = text.match(/([\"\'\‘\“\'\"\[\(\{\⟨][^\.\?\!]+[\.\?\!][\"\'\’\”\'\"\]\)\}\⟩]|[^\.\?\!]+[\.\?\!\s]+)\s?/g); // remove unecessary white space tokens = tokens.map(Function.prototype.call, String.prototype.trim); return this.trim(tokens); } } export default SentenceTokenizer;