@thi.ng/text-analysis
Version:
Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities
22 lines (21 loc) • 698 B
JavaScript
import { frequencies as $freq } from "@thi.ng/transducers/frequencies";
import { normFrequenciesAuto as $norm } from "@thi.ng/transducers/norm-frequencies-auto";
import { sortedFrequencies as $sorted } from "@thi.ng/transducers/sorted-frequencies";
const frequencies = $freq;
const normFrequencies = $norm;
const sortedFrequencies = $sorted;
const filterDocsFrequency = (docs, frequencies2, pred) => {
const histogram = frequencies2(docs.flat());
return docs.map(
(doc) => doc.filter((word) => {
const freq = histogram.get(word);
return freq !== void 0 && pred(word, freq);
})
);
};
export {
filterDocsFrequency,
frequencies,
normFrequencies,
sortedFrequencies
};