UNPKG

@thi.ng/text-analysis

Version:

Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities

61 lines (60 loc) 1.69 kB
import { mapcat } from "@thi.ng/transducers/mapcat"; const DEFAULT_REPLACEMENTS_EN = { "can't": ["can", "not"], "couldn't": ["could", "not"], "didn't": ["did", "not"], "doesn't": ["does", "not"], "don't": ["do", "not"], "hadn't": ["had", "not"], "hasn't": ["has", "not"], "haven't": ["has", "not"], "he'd": ["he", "would"], "he'll": ["he", "will"], "he's": ["he", "is"], "how'd": ["how", "would"], "how're": ["how", "are"], "how's": ["how", "is"], "i'd": ["i", "would"], "i'll": ["i", "will"], "i'm": ["i", "am"], "i've": ["i", "have"], "isn't": ["is", "not"], "it'd": ["it", "would"], "it'll": ["it", "will"], "it's": ["it", "is"], "she'd": ["she", "would"], "she'll": ["she", "would"], "she's": ["she", "is"], "should've'": ["should", "have"], "shouldn't": ["should", "not"], "they'd": ["they", "would"], "they'll": ["they", "will"], "they're": ["they", "are"], "they've": ["they", "have"], "wasn't": ["was", "not"], "we'd": ["we", "would"], "we'll": ["we", "will"], "we're": ["we", "are"], "we've": ["we", "have"], "weren't": ["were", "not"], "what'd": ["what", "would"], "what're": ["what", "are"], "what's": ["what", "is"], "who'd": ["who", "would"], "who're": ["who", "are"], "who's": ["who", "is"], "why'd": ["why", "would"], "why're": ["why", "are"], "why's": ["why", "is"], "won't": ["will", "not"], "wouldn't": ["would", "not"], "you'd": ["you", "would"], "you'll": ["you", "will"], "you're": ["you", "are"], "you've": ["you", "have"] }; const replaceWith = (dict = DEFAULT_REPLACEMENTS_EN) => mapcat((x) => dict[x] ?? [x]); export { DEFAULT_REPLACEMENTS_EN, replaceWith };