UNPKG

@thi.ng/text-analysis

Version:

Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities

22 lines (21 loc) 698 B
import { frequencies as $freq } from "@thi.ng/transducers/frequencies"; import { normFrequenciesAuto as $norm } from "@thi.ng/transducers/norm-frequencies-auto"; import { sortedFrequencies as $sorted } from "@thi.ng/transducers/sorted-frequencies"; const frequencies = $freq; const normFrequencies = $norm; const sortedFrequencies = $sorted; const filterDocsFrequency = (docs, frequencies2, pred) => { const histogram = frequencies2(docs.flat()); return docs.map( (doc) => doc.filter((word) => { const freq = histogram.get(word); return freq !== void 0 && pred(word, freq); }) ); }; export { filterDocsFrequency, frequencies, normFrequencies, sortedFrequencies };