@thi.ng/text-analysis
Version:
Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities
21 lines (20 loc) • 874 B
JavaScript
import { identity } from "@thi.ng/api/fn";
import { isIterable } from "@thi.ng/checks/is-iterable";
import { comp } from "@thi.ng/transducers/comp";
import { flatten1 } from "@thi.ng/transducers/flatten1";
import { iterator, iterator1 } from "@thi.ng/transducers/iterator";
import { join } from "@thi.ng/transducers/join";
import { keep } from "@thi.ng/transducers/keep";
import { map } from "@thi.ng/transducers/map";
import { multiplex } from "@thi.ng/transducers/multiplex";
import { partition } from "@thi.ng/transducers/partition";
function ngrams(n, sep = " ", src) {
return isIterable(src) ? iterator1(ngrams(n, sep), src) : comp(partition(n, 1), join(sep));
}
function withNgrams(n, sep = " ", src) {
return isIterable(src) ? iterator(withNgrams(n, sep), src) : comp(multiplex(map(identity), ngrams(n)), flatten1(), keep());
}
export {
ngrams,
withNgrams
};