UNPKG

@thi.ng/text-analysis

Version:

Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities

11 lines (10 loc) 372 B
import { split } from "@thi.ng/strings"; import { comp } from "@thi.ng/transducers/comp"; import { iterator } from "@thi.ng/transducers/iterator"; const tokenize = (src, xforms, delim = /[ \t\n\r,;:/?!()\[\]]+/g, includeDelim = false) => { const $src = split(src, delim, includeDelim); return xforms ? iterator(comp(...xforms), $src) : $src; }; export { tokenize };