UNPKG

@botonic/plugin-contentful

Version:

## What Does This Plugin Do?

140 lines (124 loc) 3.9 kB
import { Locale } from './locales' import { NormalizedUtterance, Normalizer, Word } from './normalizer' import { SimilarWordFinder, SimilarWordResult } from './similar-words' /** * May contain multiple words * TODO consider storing as a list of new Token class instances', each with a raw and stem fields */ export class Keyword { /** Lowercase raw keyword */ readonly raw: string /** * If hasOnlyStopWords == false, the stems of the non stopWords (eg. buy a shirt => buy shirt) * Otherwise, it contains the tokens of the stopwords (how are you => how are you) */ readonly matchString: string constructor( raw: string, readonly words: Word[], readonly hasOnlyStopWords: boolean ) { if (hasOnlyStopWords) { this.matchString = Word.joinedTokens(words, true) } else { this.matchString = words .filter(w => !w.isStopWord) .map(w => w.stem) .join(' ') } this.raw = raw.trim().toLowerCase() } static fromUtterance( rawKeyword: string, locale: Locale, normalizer: Normalizer ): Keyword { const normalized = normalizer.normalize(locale, rawKeyword) return new Keyword( rawKeyword, normalized.words, normalized.hasOnlyStopWords() ) } splitInWords(): Keyword[] { if (this.words.length == 1) { return [this] } return this.words.map(w => new Keyword(w.token, [w], w.isStopWord)) } joinedTokens(withStopWords: boolean): string { return Word.joinedTokens(this.words, withStopWords) } } export class CandidateWithKeywords<M> { constructor( readonly owner: M, readonly keywords: Keyword[] ) {} } export enum MatchType { /** After removing stop words, spaces and word endings, the input text must only contain the keywords*/ ONLY_KEYWORDS_FOUND, /** The keyword may be preceded and followed by other words */ KEYWORDS_AND_OTHERS_FOUND, /** All the words in the keyword must appear on input text, even if mixed up with other words*/ ALL_WORDS_IN_KEYWORDS_MIXED_UP, } export const MATCH_TYPES = Object.values(MatchType).map(m => m as MatchType) export enum SortType { NONE, LENGTH, } export class KeywordsOptions { constructor( readonly maxDistance = 1, readonly similarWordsMinMatchLength = 3, readonly resultsSortType = SortType.LENGTH ) {} } export class KeywordsParser<M> { private readonly candidates = [] as CandidateWithKeywords<M>[] private readonly similar: SimilarWordFinder<M> constructor( readonly locale: Locale, readonly matchType: MatchType, readonly normalizer: Normalizer, readonly options: KeywordsOptions ) { this.similar = new SimilarWordFinder<M>( true, options.similarWordsMinMatchLength ) } /** * * @param candidate * @param rawKeywords a candidate may be associated to multiple keywords, and each one of them may contain multiple * words (which must appear together in the same order). The keywords will be stemmed. * @throws EmptyTextException */ addCandidate(candidate: M, rawKeywords: string[]): void { const stemmedKeywords = rawKeywords.map(rawKeyword => { return Keyword.fromUtterance(rawKeyword, this.locale, this.normalizer) }) const candidateWithK = new CandidateWithKeywords(candidate, stemmedKeywords) this.candidates.push(candidateWithK) this.similar.addCandidate(candidateWithK) } findCandidatesWithKeywordsAt( utterance: NormalizedUtterance ): SimilarWordResult<M>[] { const results: SimilarWordResult<M>[] = this.similar.find( this.matchType, utterance, this.options.maxDistance ) return this.sort(results) } private sort(results: SimilarWordResult<M>[]) { if (this.options.resultsSortType === SortType.NONE) { return results } return results.sort((r1, r2) => r2.match.length - r1.match.length) } }