UNPKG

@thi.ng/text-analysis

Version:

Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities

106 lines (105 loc) 1.23 kB
import { filter } from "@thi.ng/transducers/filter"; const DEFAULT_STOP_WORDS_EN = /* @__PURE__ */ new Set([ "a", "above", "across", "after", "against", "along", "also", "among", "an", "and", "any", "are", "around", "at", "be", "been", "before", "being", "below", "beneath", "between", "beyond", "both", "but", "by", "could", "despite", "do", "does", "down", "during", "each", "eg", "etc", "even", "every", "few", "for", "from", "he", "i", "ie", "in", "inside", "into", "is", "it", "just", "less", "many", "may", "might", "more", "much", "must", "of", "on", "one", "onto", "or", "out", "outside", "over", "over", "quite", "really", "several", "she", "should", "since", "so", "some", "such", "that", "the", "these", "they", "this", "those", "three", "through", "to", "too", "toward", "two", "under", "until", "up", "very", "was", "we", "were", "with", "within", "without", "would", "you" ]); const removeStopWords = (words = DEFAULT_STOP_WORDS_EN) => filter((x) => !words.has(x)); export { DEFAULT_STOP_WORDS_EN, removeStopWords };