UNPKG

minisearch-synonyms

Version:
238 lines (234 loc) 7.5 kB
"use strict"; var __defProp = Object.defineProperty; var __defProps = Object.defineProperties; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropDescs = Object.getOwnPropertyDescriptors; var __getOwnPropNames = Object.getOwnPropertyNames; var __getOwnPropSymbols = Object.getOwnPropertySymbols; var __hasOwnProp = Object.prototype.hasOwnProperty; var __propIsEnum = Object.prototype.propertyIsEnumerable; var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __spreadValues = (a, b) => { for (var prop in b || (b = {})) if (__hasOwnProp.call(b, prop)) __defNormalProp(a, prop, b[prop]); if (__getOwnPropSymbols) for (var prop of __getOwnPropSymbols(b)) { if (__propIsEnum.call(b, prop)) __defNormalProp(a, prop, b[prop]); } return a; }; var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b)); var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { MiniSearchSynonyms: () => MiniSearchSynonyms, default: () => src_default }); module.exports = __toCommonJS(src_exports); // src/tokenize.ts function tokenize(query, synonyms) { const SPACE_OR_PUNCTUATION = /[\n\r\p{Z}\p{P}]/u; const tokens = []; let remaining = `${query.toLowerCase()}`; let pos = 0; let state = { type: "separator", position: 0, length: 0, value: "" }; while (remaining.length > 0) { let newState = void 0; let forward = 1; if (state.type === "separator") { let foundWord = void 0; for (const word of synonyms) { if (remaining.startsWith(word)) { foundWord = word; break; } } if (foundWord) { newState = { type: "synonym", position: pos, length: foundWord.length, value: remaining.slice(0, foundWord.length) }; forward = foundWord.length; } else if (remaining[0].match(SPACE_OR_PUNCTUATION)) { state.length += 1; state.value += remaining[0]; } else { newState = { type: "word", position: pos, length: 1, value: remaining[0] }; } } else if (state.type === "synonym") { if (remaining[0].match(SPACE_OR_PUNCTUATION)) { newState = { type: "separator", position: pos, length: 1, value: remaining[0] }; } else { const otherSynonyms = synonyms.filter((synonym) => synonym !== state.value.toLowerCase()); const previousRemaining = state.value.toLowerCase() + remaining; const match = otherSynonyms.find((synonym) => previousRemaining.startsWith(synonym)); if (match) { forward = match.length - state.length; state.length = match.length; state.value = match; } else { state.type = "word"; state.length += 1; state.value += remaining[0]; } } } else { if (remaining[0].match(SPACE_OR_PUNCTUATION)) { newState = { type: "separator", position: pos, length: 1, value: remaining[0] }; } else { state.length += 1; state.value += remaining[0]; } } if (newState) { if (state.length > 0) { tokens.push(state); } state = newState; } pos += forward; remaining = remaining.slice(forward); } if (state.length > 0) { tokens.push(state); } return tokens; } // src/index.ts var MiniSearchSynonyms = class { constructor(groups) { this.groups = []; this.wordmap = /* @__PURE__ */ new Map(); if (groups) { for (const group of groups) { this.addSynonyms(group); } } } addSynonyms(group) { if (group.length < 2) { throw new Error("Synonym must have at least 2 words"); } let uniqueWords = []; for (const word of group) { if (this.wordmap.has(word)) { throw new Error(`Word \`${word}\` cannot be in multiple groups`); } if (uniqueWords.find((w) => w === word)) { throw new Error(`Duplicate synonym: \`${word}\``); } uniqueWords.push(word); } const newGroup = group.map((w) => w.toLowerCase()).sort(); this.groups.push(newGroup); for (const word of newGroup) { this.wordmap.set(word, newGroup); } } removeSynonyms(word) { const lowerCaseWord = word.toLowerCase(); const groupIndex = this.groups.findIndex((group) => group.find((w) => w === lowerCaseWord)); if (groupIndex >= 0) { const group = this.groups[groupIndex]; for (const w of group) { this.wordmap.delete(w); } this.groups.splice(groupIndex, 1); } } getSynonyms(word) { const lowerCaseWord = word.toLowerCase(); const synonyms = this.wordmap.get(lowerCaseWord) || []; return synonyms.filter((synonym) => synonym !== lowerCaseWord); } expandQuery(query) { if (!query || query.length === 0) { return query; } const tokens = tokenize(query, [...this.wordmap.keys()]); const synonymOptions = {}; for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; if (token.type === "synonym") { const word = token.value.toLowerCase(); const synonyms = this.getSynonyms(word); if (synonyms.length > 0) { synonymOptions[i.toString()] = [word, ...synonyms]; } } } const synonymCombinations = this.generateKeywordCombinations(synonymOptions, []); const queries = synonymCombinations.map((combination) => { return tokens.map((token, i) => { const iStr = i.toString(); return synonymOptions[iStr] ? combination[iStr] : token.value; }).join(""); }); if (queries.length === 0) { return query; } return { combineWith: "OR", queries }; } generateKeywordCombinations(synonymOptions, combinations) { const keys = Object.keys(synonymOptions); if (keys.length === 0) { return combinations; } const key = keys[0]; const newCombinations = []; if (Object.keys(combinations).length === 0) { for (const option of synonymOptions[key]) { newCombinations.push({ [key]: option }); } } else { for (const option of synonymOptions[key]) { for (const combination of combinations) { newCombinations.push(__spreadProps(__spreadValues({}, combination), { [key]: option })); } } } const remainingOptions = __spreadValues({}, synonymOptions); delete remainingOptions[key]; return this.generateKeywordCombinations(remainingOptions, newCombinations); } }; var src_default = MiniSearchSynonyms; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { MiniSearchSynonyms });