minisearch-synonyms
Version:
Use MiniSearch with synonyms
238 lines (234 loc) • 7.5 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __defProps = Object.defineProperties;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getOwnPropSymbols = Object.getOwnPropertySymbols;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __propIsEnum = Object.prototype.propertyIsEnumerable;
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __spreadValues = (a, b) => {
for (var prop in b || (b = {}))
if (__hasOwnProp.call(b, prop))
__defNormalProp(a, prop, b[prop]);
if (__getOwnPropSymbols)
for (var prop of __getOwnPropSymbols(b)) {
if (__propIsEnum.call(b, prop))
__defNormalProp(a, prop, b[prop]);
}
return a;
};
var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var src_exports = {};
__export(src_exports, {
MiniSearchSynonyms: () => MiniSearchSynonyms,
default: () => src_default
});
module.exports = __toCommonJS(src_exports);
// src/tokenize.ts
function tokenize(query, synonyms) {
const SPACE_OR_PUNCTUATION = /[\n\r\p{Z}\p{P}]/u;
const tokens = [];
let remaining = `${query.toLowerCase()}`;
let pos = 0;
let state = { type: "separator", position: 0, length: 0, value: "" };
while (remaining.length > 0) {
let newState = void 0;
let forward = 1;
if (state.type === "separator") {
let foundWord = void 0;
for (const word of synonyms) {
if (remaining.startsWith(word)) {
foundWord = word;
break;
}
}
if (foundWord) {
newState = {
type: "synonym",
position: pos,
length: foundWord.length,
value: remaining.slice(0, foundWord.length)
};
forward = foundWord.length;
} else if (remaining[0].match(SPACE_OR_PUNCTUATION)) {
state.length += 1;
state.value += remaining[0];
} else {
newState = {
type: "word",
position: pos,
length: 1,
value: remaining[0]
};
}
} else if (state.type === "synonym") {
if (remaining[0].match(SPACE_OR_PUNCTUATION)) {
newState = {
type: "separator",
position: pos,
length: 1,
value: remaining[0]
};
} else {
const otherSynonyms = synonyms.filter((synonym) => synonym !== state.value.toLowerCase());
const previousRemaining = state.value.toLowerCase() + remaining;
const match = otherSynonyms.find((synonym) => previousRemaining.startsWith(synonym));
if (match) {
forward = match.length - state.length;
state.length = match.length;
state.value = match;
} else {
state.type = "word";
state.length += 1;
state.value += remaining[0];
}
}
} else {
if (remaining[0].match(SPACE_OR_PUNCTUATION)) {
newState = {
type: "separator",
position: pos,
length: 1,
value: remaining[0]
};
} else {
state.length += 1;
state.value += remaining[0];
}
}
if (newState) {
if (state.length > 0) {
tokens.push(state);
}
state = newState;
}
pos += forward;
remaining = remaining.slice(forward);
}
if (state.length > 0) {
tokens.push(state);
}
return tokens;
}
// src/index.ts
var MiniSearchSynonyms = class {
constructor(groups) {
this.groups = [];
this.wordmap = /* @__PURE__ */ new Map();
if (groups) {
for (const group of groups) {
this.addSynonyms(group);
}
}
}
addSynonyms(group) {
if (group.length < 2) {
throw new Error("Synonym must have at least 2 words");
}
let uniqueWords = [];
for (const word of group) {
if (this.wordmap.has(word)) {
throw new Error(`Word \`${word}\` cannot be in multiple groups`);
}
if (uniqueWords.find((w) => w === word)) {
throw new Error(`Duplicate synonym: \`${word}\``);
}
uniqueWords.push(word);
}
const newGroup = group.map((w) => w.toLowerCase()).sort();
this.groups.push(newGroup);
for (const word of newGroup) {
this.wordmap.set(word, newGroup);
}
}
removeSynonyms(word) {
const lowerCaseWord = word.toLowerCase();
const groupIndex = this.groups.findIndex((group) => group.find((w) => w === lowerCaseWord));
if (groupIndex >= 0) {
const group = this.groups[groupIndex];
for (const w of group) {
this.wordmap.delete(w);
}
this.groups.splice(groupIndex, 1);
}
}
getSynonyms(word) {
const lowerCaseWord = word.toLowerCase();
const synonyms = this.wordmap.get(lowerCaseWord) || [];
return synonyms.filter((synonym) => synonym !== lowerCaseWord);
}
expandQuery(query) {
if (!query || query.length === 0) {
return query;
}
const tokens = tokenize(query, [...this.wordmap.keys()]);
const synonymOptions = {};
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (token.type === "synonym") {
const word = token.value.toLowerCase();
const synonyms = this.getSynonyms(word);
if (synonyms.length > 0) {
synonymOptions[i.toString()] = [word, ...synonyms];
}
}
}
const synonymCombinations = this.generateKeywordCombinations(synonymOptions, []);
const queries = synonymCombinations.map((combination) => {
return tokens.map((token, i) => {
const iStr = i.toString();
return synonymOptions[iStr] ? combination[iStr] : token.value;
}).join("");
});
if (queries.length === 0) {
return query;
}
return {
combineWith: "OR",
queries
};
}
generateKeywordCombinations(synonymOptions, combinations) {
const keys = Object.keys(synonymOptions);
if (keys.length === 0) {
return combinations;
}
const key = keys[0];
const newCombinations = [];
if (Object.keys(combinations).length === 0) {
for (const option of synonymOptions[key]) {
newCombinations.push({ [key]: option });
}
} else {
for (const option of synonymOptions[key]) {
for (const combination of combinations) {
newCombinations.push(__spreadProps(__spreadValues({}, combination), { [key]: option }));
}
}
}
const remainingOptions = __spreadValues({}, synonymOptions);
delete remainingOptions[key];
return this.generateKeywordCombinations(remainingOptions, newCombinations);
}
};
var src_default = MiniSearchSynonyms;
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
MiniSearchSynonyms
});