@thi.ng/text-analysis
Version:
Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities
11 lines (10 loc) • 372 B
JavaScript
import { split } from "@thi.ng/strings";
import { comp } from "@thi.ng/transducers/comp";
import { iterator } from "@thi.ng/transducers/iterator";
const tokenize = (src, xforms, delim = /[ \t\n\r,;:/?!()\[\]]+/g, includeDelim = false) => {
const $src = split(src, delim, includeDelim);
return xforms ? iterator(comp(...xforms), $src) : $src;
};
export {
tokenize
};