UNPKG

@thi.ng/text-analysis

Version:

Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities

21 lines (20 loc) 874 B
import { identity } from "@thi.ng/api/fn"; import { isIterable } from "@thi.ng/checks/is-iterable"; import { comp } from "@thi.ng/transducers/comp"; import { flatten1 } from "@thi.ng/transducers/flatten1"; import { iterator, iterator1 } from "@thi.ng/transducers/iterator"; import { join } from "@thi.ng/transducers/join"; import { keep } from "@thi.ng/transducers/keep"; import { map } from "@thi.ng/transducers/map"; import { multiplex } from "@thi.ng/transducers/multiplex"; import { partition } from "@thi.ng/transducers/partition"; function ngrams(n, sep = " ", src) { return isIterable(src) ? iterator1(ngrams(n, sep), src) : comp(partition(n, 1), join(sep)); } function withNgrams(n, sep = " ", src) { return isIterable(src) ? iterator(withNgrams(n, sep), src) : comp(multiplex(map(identity), ngrams(n)), flatten1(), keep()); } export { ngrams, withNgrams };