@thi.ng/text-analysis
Version:
Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities
106 lines (105 loc) • 1.23 kB
JavaScript
import { filter } from "@thi.ng/transducers/filter";
const DEFAULT_STOP_WORDS_EN = /* @__PURE__ */ new Set([
"a",
"above",
"across",
"after",
"against",
"along",
"also",
"among",
"an",
"and",
"any",
"are",
"around",
"at",
"be",
"been",
"before",
"being",
"below",
"beneath",
"between",
"beyond",
"both",
"but",
"by",
"could",
"despite",
"do",
"does",
"down",
"during",
"each",
"eg",
"etc",
"even",
"every",
"few",
"for",
"from",
"he",
"i",
"ie",
"in",
"inside",
"into",
"is",
"it",
"just",
"less",
"many",
"may",
"might",
"more",
"much",
"must",
"of",
"on",
"one",
"onto",
"or",
"out",
"outside",
"over",
"over",
"quite",
"really",
"several",
"she",
"should",
"since",
"so",
"some",
"such",
"that",
"the",
"these",
"they",
"this",
"those",
"three",
"through",
"to",
"too",
"toward",
"two",
"under",
"until",
"up",
"very",
"was",
"we",
"were",
"with",
"within",
"without",
"would",
"you"
]);
const removeStopWords = (words = DEFAULT_STOP_WORDS_EN) => filter((x) => !words.has(x));
export {
DEFAULT_STOP_WORDS_EN,
removeStopWords
};