@thi.ng/text-analysis
Version: 
Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities
11 lines (10 loc) • 372 B
JavaScript
import { split } from "@thi.ng/strings";
import { comp } from "@thi.ng/transducers/comp";
import { iterator } from "@thi.ng/transducers/iterator";
const tokenize = (src, xforms, delim = /[ \t\n\r,;:/?!()\[\]]+/g, includeDelim = false) => {
  const $src = split(src, delim, includeDelim);
  return xforms ? iterator(comp(...xforms), $src) : $src;
};
export {
  tokenize
};