UNPKG

wn-ts-node

Version:
1,164 lines 60.8 kB
import { sql as x, SqliteDialect as v, Kysely as W } from "kysely"; import "./logger-ClUC0kzz.js"; async function _(r, e, s, n = 150) { if (!s || s.length === 0) return; const t = Math.ceil(s.length / n); console.log(`Starting batch insert for table ${String(e)}: ${s.length} records in ${t} chunks of ${n}`); for (let i = 0; i < s.length; i += n) { const o = s.slice(i, i + n), l = Math.floor(i / n) + 1; if (o.length > 0) { console.log(`Inserting chunk ${l}/${t} for table ${String(e)}: ${o.length} records`); const c = new Promise((a, u) => { setTimeout(() => u(new Error(`Batch insert timeout after 120 seconds for table ${String(e)} chunk ${l}`)), 12e4); }), f = r.insertInto(e).values(o).onConflict((a) => a.column("id").doNothing()).execute(); await Promise.race([f, c]), console.log(`Completed chunk ${l}/${t} for table ${String(e)}`); } } console.log(`Completed batch insert for table ${String(e)}: ${s.length} records`); } function $(r, e, s) { return r.selectFrom("senses").innerJoin("words", "senses.word_id", "words.id").selectAll("words").where("senses.synset_id", "=", e).$if(!!s, (n) => n.where("words.language", "=", s)); } function E(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, searchAllForms: o = !1, fuzzy: l = !1, maxResults: c, includeInflected: f = !1 } = e; let a = r.selectFrom("words").distinct().selectAll("words"); if (t && t !== "*" && (Array.isArray(t) ? t.length > 0 && (a = a.where("words.lexicon", "in", t)) : a = a.where(x`words.lexicon`, "=", t)), i && i !== "*" && (a = a.where("words.language", "=", i)), n && (a = a.where(x`words.pos`, "=", n)), s) { const u = l ? `%${s.toLowerCase()}%` : s.toLowerCase(); o || f ? (a = a.leftJoin("forms", "words.id", "forms.word_id"), l ? a = a.where( (w) => w.or([ w(x`lower(words.lemma)`, "like", u), w(x`lower(forms.written_form)`, "like", u) ]) ) : a = a.where( (w) => w.or([ w(x`lower(words.lemma)`, "=", u), w(x`lower(forms.written_form)`, "=", u) ]) )) : l ? a = a.where(x`lower(words.lemma)`, "like", u) : a = a.where(x`lower(words.lemma)`, "=", u); } return c && (a = a.limit(c)), a.orderBy("words.lemma").orderBy("words.pos"); } function N(r, e) { return r.selectFrom("words").selectAll().where("id", "=", e); } function z(r, e, s = {}) { return r.selectFrom("words").selectAll("words").where("lemma", "=", e.toLowerCase()).$if(!!s.pos, (n) => n.where("pos", "=", s.pos)).$if(!!s.lexicon, (n) => n.where("lexicon", "=", s.lexicon)).$if(!!s.maxResults, (n) => n.limit(s.maxResults)).orderBy("lemma").orderBy("pos"); } function J(r, e, s = {}) { const n = `%${e.toLowerCase()}%`; return r.selectFrom("words").selectAll("words").where("lemma", "like", n).$if(!!s.pos, (t) => t.where("pos", "=", s.pos)).$if(!!s.lexicon, (t) => t.where("lexicon", "=", s.lexicon)).$if(!!s.maxResults, (t) => t.limit(s.maxResults)).orderBy("lemma").orderBy("pos"); } function Q(r, e) { return r.selectFrom("words").selectAll().where("lexicon", "=", e); } function T(r, e) { return !e || e.length === 0 ? r.selectFrom("words").selectAll().where("id", "=", "") : r.selectFrom("words").selectAll().where("id", "in", e); } function k(r, e, s) { return r.selectFrom("senses").innerJoin("words", "senses.word_id", "words.id").innerJoin("synsets", "senses.synset_id", "synsets.id").selectAll("words").where("synsets.ili", "=", e).$if(!!s, (n) => n.where("words.language", "=", s)); } function M(r, e, s) { return r.selectFrom("senses").innerJoin("words", "senses.word_id", "words.id").innerJoin("synsets", "senses.synset_id", "synsets.id").selectAll("words").where("synsets.ili", "=", e).where("words.lexicon", "like", `${s}%`); } function F(r, e = {}) { const { wordIdOrForm: s, pos: n, lexicon: t } = e; let i = r.selectFrom("senses").selectAll("senses"), o = !!(s && !s.includes(".")) || n || t && t !== "*"; return o && (i = i.innerJoin("words", "senses.word_id", "words.id")), s && (s.includes("-") && (s.endsWith("-n") || s.endsWith("-v") || s.endsWith("-a") || s.endsWith("-r") || s.endsWith("-s") || s.endsWith("-c") || s.endsWith("-p") || s.endsWith("-i") || s.endsWith("-x") || s.endsWith("-u")) ? i = i.where("senses.word_id", "=", s) : (o || (i = i.innerJoin("words", "senses.word_id", "words.id"), o = !0), i = i.where(x`words.lemma`, "=", s.toLowerCase()))), n && (o || (i = i.innerJoin("words", "senses.word_id", "words.id"), o = !0), i = i.where(x`words.pos`, "=", n)), t && t !== "*" && (o || (i = i.innerJoin("words", "senses.word_id", "words.id"), o = !0), i = i.where(x`words.lexicon`, "=", t)), i; } function O(r, e) { return r.selectFrom("senses").selectAll().where("id", "=", e); } function P(r, e) { return r.selectFrom("senses").selectAll().where("word_id", "=", e); } function K(r, e) { return r.selectFrom("senses").selectAll().where("synset_id", "=", e); } function V(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, ili: o, fuzzy: l = !1, maxResults: c } = e; return r.selectFrom("synsets").selectAll("synsets").$if(!!s, (a) => s ? a.leftJoin("senses", "senses.synset_id", "synsets.id").leftJoin("words", "words.id", "senses.word_id").where("words.lemma", l ? "like" : "=", l ? `%${s.toLowerCase()}%` : s.toLowerCase()).distinct() : a).$if(!!n, (a) => s ? a.where( "synsets.id", "in", r.selectFrom("senses").leftJoin("words", "words.id", "senses.word_id").select("senses.synset_id").where("words.pos", "=", n) ) : a.leftJoin("senses", "senses.synset_id", "synsets.id").leftJoin("words", "words.id", "senses.word_id").where("words.pos", "=", n).distinct()).$if(!!t && t !== "*", (a) => Array.isArray(t) ? t.length > 0 ? a.where("synsets.lexicon", "in", t) : a : a.where("synsets.lexicon", "=", t)).$if(!!i, (a) => a.where("synsets.language", "=", i)).$if(!!o, (a) => a.where("synsets.ili", "=", o)).$if(!!c, (a) => a.limit(c)).orderBy("synsets.id"); } function U(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, ili: o, fuzzy: l = !1, maxResults: c } = e; return r.selectFrom("synsets").selectAll("synsets").$if(!!s, (a) => s ? a.leftJoin("senses", "senses.synset_id", "synsets.id").leftJoin("words", "words.id", "senses.word_id").where("words.lemma", l ? "like" : "=", l ? `%${s.toLowerCase()}%` : s.toLowerCase()).distinct() : a).$if(!!n, (a) => s ? a.where( "synsets.id", "in", r.selectFrom("senses").leftJoin("words", "words.id", "senses.word_id").select("senses.synset_id").where("words.pos", "=", n) ) : a.leftJoin("senses", "senses.synset_id", "synsets.id").leftJoin("words", "words.id", "senses.word_id").where("words.pos", "=", n).distinct()).$if(!!t && t !== "*", (a) => Array.isArray(t) ? t.length > 0 ? a.where("synsets.lexicon", "in", t) : a : a.where("synsets.lexicon", "=", t)).$if(!!i, (a) => a.where("synsets.language", "=", i)).$if(!!o, (a) => a.where("synsets.ili", "=", o)).$if(!!c, (a) => a.limit(c)).orderBy("synsets.id"); } function G(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, ili: o, fuzzy: l = !1, maxResults: c } = e; return r.selectFrom("synsets").leftJoin("definitions", "definitions.synset_id", "synsets.id").leftJoin("examples", "examples.synset_id", "synsets.id").leftJoin("relations", "relations.source_id", "synsets.id").leftJoin("senses", "senses.synset_id", "synsets.id").leftJoin("words", "words.id", "senses.word_id").select([ "synsets.id as synset_id", "synsets.pos as synset_pos", "synsets.language as synset_language", "synsets.lexicon as synset_lexicon", "synsets.ili as synset_ili", "definitions.id as def_id", "definitions.language as def_language", "definitions.text as def_text", "definitions.source as def_source", "examples.id as ex_id", "examples.language as ex_language", "examples.text as ex_text", "examples.source as ex_source", "relations.id as rel_id", "relations.type as rel_type", "relations.target_id as rel_target", "relations.source as rel_source", "senses.id as sense_id", "senses.word_id as sense_word_id" ]).distinct().$if(!!s, (a) => s ? a.where("words.lemma", l ? "like" : "=", l ? `%${s.toLowerCase()}%` : s.toLowerCase()) : a).$if(!!n, (a) => a.where("words.pos", "=", n)).$if(!!t && t !== "*", (a) => Array.isArray(t) ? t.length > 0 ? a.where("synsets.lexicon", "in", t) : a : a.where("synsets.lexicon", "=", t)).$if(!!i, (a) => a.where("synsets.language", "=", i)).$if(!!o, (a) => a.where("synsets.ili", "=", o)).$if(!!c, (a) => a.limit(c)).orderBy("synsets.id"); } function j(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, ili: o, fuzzy: l = !1, maxResults: c } = e; return r.selectFrom("synsets").leftJoin("definitions", "definitions.synset_id", "synsets.id").leftJoin("examples", "examples.synset_id", "synsets.id").leftJoin("relations", "relations.source_id", "synsets.id").leftJoin("senses", "senses.synset_id", "synsets.id").leftJoin("words", "words.id", "senses.word_id").select([ "synsets.id as synset_id", "synsets.pos as synset_pos", "synsets.language as synset_language", "synsets.lexicon as synset_lexicon", "synsets.ili as synset_ili", "definitions.id as def_id", "definitions.language as def_language", "definitions.text as def_text", "definitions.source as def_source", "examples.id as ex_id", "examples.language as ex_language", "examples.text as ex_text", "examples.source as ex_source", "relations.id as rel_id", "relations.type as rel_type", "relations.target_id as rel_target", "relations.source as rel_source", "senses.id as sense_id", "senses.word_id as sense_word_id" ]).distinct().$if(!!s, (a) => s ? a.where("words.lemma", l ? "like" : "=", l ? `%${s.toLowerCase()}%` : s.toLowerCase()) : a).$if(!!n, (a) => a.where("words.pos", "=", n)).$if(!!t && t !== "*", (a) => Array.isArray(t) ? t.length > 0 ? a.where("synsets.lexicon", "in", t) : a : a.where("synsets.lexicon", "=", t)).$if(!!i, (a) => a.where("synsets.language", "=", i)).$if(!!o, (a) => a.where("synsets.ili", "=", o)).$if(!!c, (a) => a.limit(c)).orderBy("synsets.id"); } function H(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, ili: o, fuzzy: l = !1, maxResults: c } = e; return r.selectFrom("synsets").selectAll("synsets").$if(!!s, (a) => s ? a.innerJoin("senses", "senses.synset_id", "synsets.id").innerJoin("words", "words.id", "senses.word_id").where("words.lemma", l ? "like" : "=", l ? `%${s.toLowerCase()}%` : s.toLowerCase()).distinct() : a).$if(!!n, (a) => a.innerJoin("senses", "senses.synset_id", "synsets.id").innerJoin("words", "words.id", "senses.word_id").where("words.pos", "=", n).distinct()).$if(!!t && t !== "*", (a) => Array.isArray(t) ? t.length > 0 ? a.where("synsets.lexicon", "in", t) : a : a.where("synsets.lexicon", "=", t)).$if(!!i, (a) => a.where("synsets.language", "=", i)).$if(!!o, (a) => a.where("synsets.ili", "=", o)).$if(!!c, (a) => a.limit(c)).orderBy("synsets.id"); } function Y(r, e = {}) { const { form: s, pos: n, lexicon: t, language: i, ili: o, fuzzy: l = !1, maxResults: c } = e; return r.selectFrom("synsets").selectAll("synsets").$if(!!s, (a) => s ? a.innerJoin("senses", "senses.synset_id", "synsets.id").innerJoin("words", "words.id", "senses.word_id").where("words.lemma", l ? "like" : "=", l ? `%${s.toLowerCase()}%` : s.toLowerCase()).distinct() : a).$if(!!n, (a) => a.innerJoin("senses", "senses.synset_id", "synsets.id").innerJoin("words", "words.id", "senses.word_id").where("words.pos", "=", n).distinct()).$if(!!t && t !== "*", (a) => Array.isArray(t) ? t.length > 0 ? a.where("synsets.lexicon", "in", t) : a : a.where("synsets.lexicon", "=", t)).$if(!!i, (a) => a.where("synsets.language", "=", i)).$if(!!o, (a) => a.where("synsets.ili", "=", o)).$if(!!c, (a) => a.limit(c)).orderBy("synsets.id"); } function Z(r, e) { return r.selectFrom("synsets").selectAll().where("id", "=", e); } function X(r, e, s = {}) { return r.selectFrom("synsets").innerJoin("senses", "senses.synset_id", "synsets.id").innerJoin("words", "words.id", "senses.word_id").selectAll("synsets").where("words.lemma", "=", e.toLowerCase()).distinct().$if(!!s.pos, (n) => n.where("words.pos", "=", s.pos)).$if(!!s.lexicon, (n) => n.where("synsets.lexicon", "=", s.lexicon)).$if(!!s.maxResults, (n) => n.limit(s.maxResults)).orderBy("synsets.id"); } function ee(r, e) { return r.selectFrom("synsets").selectAll().where("lexicon", "=", e); } function se(r, e) { return r.selectFrom("definitions").selectAll().where("synset_id", "=", e); } function te(r, e = {}) { return r.selectFrom("lexicons").selectAll().$if(!!e.id && e.id !== "*", (s) => s.where("id", "=", e.id)).$if(!!e.ids && e.ids.length > 0, (s) => s.where("id", "in", e.ids)).$if(!!e.language, (s) => s.where("language", "=", e.language)).$if(!!e.version, (s) => s.where("version", "=", e.version)); } function ne(r, e) { return r.selectFrom("lexicons").selectAll().where("id", "=", e); } function ie(r, e) { return r.selectFrom("ilis").selectAll().where("id", "=", e); } function re(r, e = {}) { return r.selectFrom("ilis").selectAll().$if(!!e.status, (s) => s.where("status", "=", e.status)); } function ae(r, e) { return r.selectFrom("relations").selectAll().where("source_id", "=", e); } function oe(r, e) { return r.selectFrom("examples").selectAll().where("synset_id", "=", e); } function le(r, e) { return r.selectFrom("forms").selectAll().where("word_id", "=", e); } function ce(r) { return { totalWords: r.selectFrom("words").select(r.fn.countAll().as("count")), totalSynsets: r.selectFrom("synsets").select(r.fn.countAll().as("count")), totalSenses: r.selectFrom("senses").select(r.fn.countAll().as("count")), totalILIs: r.selectFrom("synsets").where("ili", "is not", null).select(r.fn.countAll().as("count")), totalLexicons: r.selectFrom("lexicons").select(r.fn.countAll().as("count")) }; } function R(r, e) { return r.selectFrom("definitions").selectAll().where("synset_id", "in", e); } function A(r, e) { return r.selectFrom("examples").selectAll().where("synset_id", "in", e); } function L(r, e) { return r.selectFrom("relations").selectAll().where("source_id", "in", e); } function q(r, e) { return r.selectFrom("senses").selectAll().where("synset_id", "in", e); } function D(r, e) { return r.selectFrom("senses").select("word_id").where("synset_id", "=", e); } function B(r, e) { return r.selectFrom("senses").selectAll().where("synset_id", "=", e); } class de { constructor(e, s) { this.db = e, s?.strategy && (this.defaultStrategy = s.strategy, this.updateDefaultOptions(s.strategy)); } defaultStrategy = "default"; defaultOptions = { strategy: "default", includeDefinitions: !0, includeExamples: !0, includeRelations: !0, includeSenses: !0 }; // Public getter for database access get database() { return this.db; } updateDefaultOptions(e) { this.defaultOptions = this.getOptionsForStrategy(e); } getOptionsForStrategy(e) { switch (e) { case "fast": return { strategy: "fast", includeDefinitions: !1, includeExamples: !1, includeRelations: !1, includeSenses: !1 }; case "balanced": return { strategy: "balanced", includeDefinitions: !0, includeExamples: !1, includeRelations: !1, includeSenses: !1 }; case "complete": return { strategy: "complete", includeDefinitions: !0, includeExamples: !0, includeRelations: !0, includeSenses: !0 }; case "default": default: return { strategy: "default", includeDefinitions: !0, includeExamples: !0, includeRelations: !0, includeSenses: !0 }; } } // Lexicon queries async getLexicons(e = {}) { return (await te(this.db, e).execute()).map(this.transformLexiconRecord.bind(this)); } async getLexiconById(e) { const s = await ne(this.db, e).executeTakeFirst(); return s ? this.transformLexiconRecord(s) : void 0; } // Word queries async getWords(e = {}) { const n = await E(this.db, e).execute(); return await Promise.all(n.map(this.transformWordRecord.bind(this))); } async getWordById(e) { const s = await N(this.db, e).executeTakeFirst(); return s ? await this.transformWordRecord(s) : void 0; } // Optimized word query methods async getWordsByFormFast(e, s = {}) { const t = await z(this.db, e, s).execute(); return await Promise.all(t.map(this.transformWordRecord.bind(this))); } async getWordsByFormFuzzyFast(e, s = {}) { const t = await J(this.db, e, s).execute(); return await Promise.all(t.map(this.transformWordRecord.bind(this))); } // Synset queries - Strategy-specific methods for better type safety async getSynsets(e = {}) { return this.getSynsetsV5(e); } // V1 Strategy - DEPRECATED: Use V5 or V6 for better performance // Performance: ~0.4 Hz (very slow) // This strategy is kept for backward compatibility only async getSynsetsV1(e = {}) { return this.getSynsetsV4(e); } // V2 Strategy - DEPRECATED: Use V5 or V6 for better performance // Performance: ~0.42 Hz (very slow) // This strategy is kept for backward compatibility only async getSynsetsV2(e = {}) { const n = await V(this.db, e).execute(), t = []; for (const i of n) t.push(await this.transformSynsetRecordV2(i)); return t; } // V3 Strategy - DEPRECATED: Use V5 or V6 for better performance // Performance: ~0.47 Hz (very slow) // This strategy is kept for backward compatibility only async getSynsetsV3(e = {}) { const n = await U(this.db, e).execute(); if (n.length === 0) return []; const t = n.map((d) => d.id), i = await R(this.db, t).execute(), o = await A(this.db, t).execute(), l = await L(this.db, t).execute(), c = await q(this.db, t).execute(), f = /* @__PURE__ */ new Map(), a = /* @__PURE__ */ new Map(), u = /* @__PURE__ */ new Map(), w = /* @__PURE__ */ new Map(); i.forEach((d) => { d.synset_id && !f.has(d.synset_id) && f.set(d.synset_id, []), d.synset_id && f.get(d.synset_id).push(d); }), o.forEach((d) => { d.synset_id && !a.has(d.synset_id) && a.set(d.synset_id, []), d.synset_id && a.get(d.synset_id).push(d); }), l.forEach((d) => { d.source_id && !u.has(d.source_id) && u.set(d.source_id, []), d.source_id && u.get(d.source_id).push(d); }), c.forEach((d) => { d.synset_id && !w.has(d.synset_id) && w.set(d.synset_id, []), d.synset_id && w.get(d.synset_id).push(d); }); const g = []; for (const d of n) { const y = f.get(d.id) || [], b = a.get(d.id) || [], I = u.get(d.id) || [], p = w.get(d.id) || [], m = { id: d.id, pos: d.pos, language: d.language || "", lexicon: d.lexicon, definitions: y.map((h) => ({ id: h.id, language: h.language, text: h.text, source: h.source || void 0 })), examples: b.map((h) => ({ id: h.id, language: h.language || "", text: h.text, source: h.source || "" })), relations: I.map((h) => ({ id: h.id, type: h.type, target: h.target_id, source: h.source || "" })), memberIds: p.map((h) => h.word_id), senseIds: p.map((h) => h.id) }; d.ili !== void 0 && d.ili !== null && (m.ili = d.ili), g.push(m); } return g; } // V4 Strategy - DEPRECATED: Use V5 or V6 for better performance // Performance: ~0.40 Hz (very slow) // This strategy is kept for backward compatibility only async getSynsetsV4(e = {}) { const n = await G(this.db, e).execute(); if (n.length === 0) return []; const t = /* @__PURE__ */ new Map(); for (const i of n) { const o = i.synset_id; t.has(o) || t.set(o, { id: o, pos: i.synset_pos, language: i.synset_language || "", lexicon: i.synset_lexicon, definitions: [], examples: [], relations: [], memberIds: [], senseIds: [], ili: i.synset_ili || void 0 }); const l = t.get(o); i.def_id && !l.definitions.some((c) => c.id === i.def_id) && l.definitions.push({ id: i.def_id, language: i.def_language || "", text: i.def_text || "", source: i.def_source || "" }), i.ex_id && !l.examples.some((c) => c.id === i.ex_id) && l.examples.push({ id: i.ex_id, language: i.ex_language || "", text: i.ex_text || "", source: i.ex_source || "" }), i.rel_id && !l.relations.some((c) => c.id === i.rel_id) && l.relations.push({ id: i.rel_id, type: i.rel_type || "", target: i.rel_target || "", source: i.rel_source || "" }), i.sense_id && !l.senseIds.includes(i.sense_id) && (l.senseIds.push(i.sense_id), i.sense_word_id && !l.memberIds.includes(i.sense_word_id) && l.memberIds.push(i.sense_word_id)); } return Array.from(t.values()); } // V5 Strategy - Ultra-optimized with indexes and caching queryCache = /* @__PURE__ */ new Map(); cacheHits = 0; cacheMisses = 0; async getSynsetsV5(e = {}) { const s = `synsets:${JSON.stringify(e)}`; if (this.queryCache.has(s)) return this.cacheHits++, this.queryCache.get(s); this.cacheMisses++; const t = await j(this.db, e).execute(); if (t.length === 0) return this.queryCache.set(s, []), []; const i = /* @__PURE__ */ new Map(), o = /* @__PURE__ */ new Set(), l = /* @__PURE__ */ new Set(), c = /* @__PURE__ */ new Set(), f = /* @__PURE__ */ new Set(); for (const u of t) { const w = u.synset_id; i.has(w) || i.set(w, { id: w, pos: u.synset_pos, language: u.synset_language || "", lexicon: u.synset_lexicon, definitions: [], examples: [], relations: [], memberIds: [], senseIds: [], ili: u.synset_ili || void 0 }); const g = i.get(w); u.def_id && !o.has(u.def_id) && (o.add(u.def_id), g.definitions.push({ id: u.def_id, language: u.def_language || "", text: u.def_text || "", source: u.def_source || "" })), u.ex_id && !l.has(u.ex_id) && (l.add(u.ex_id), g.examples.push({ id: u.ex_id, language: u.ex_language || "", text: u.ex_text || "", source: u.ex_source || "" })), u.rel_id && !c.has(u.rel_id) && (c.add(u.rel_id), g.relations.push({ id: u.rel_id, type: u.rel_type || "", target: u.rel_target || "", source: u.rel_source || "" })), u.sense_id && !f.has(u.sense_id) && (f.add(u.sense_id), g.senseIds.push(u.sense_id), u.sense_word_id && !g.memberIds.includes(u.sense_word_id) && g.memberIds.push(u.sense_word_id)); } const a = Array.from(i.values()); if (this.queryCache.set(s, a), this.queryCache.size > 1e3) { const u = this.queryCache.keys().next().value; u && this.queryCache.delete(u); } return a; } // V6 Strategy - Memory-optimized with pre-computed indexes async getSynsetsV6(e = {}) { const n = await H(this.db, e).execute(); if (n.length === 0) return []; const t = n.map((d) => d.id), i = await R(this.db, t).execute(), o = await A(this.db, t).execute(), l = await L(this.db, t).execute(), c = await q(this.db, t).execute(), f = /* @__PURE__ */ new Map(), a = /* @__PURE__ */ new Map(), u = /* @__PURE__ */ new Map(), w = /* @__PURE__ */ new Map(); for (let d = 0; d < i.length; d++) { const y = i[d]; y && y.synset_id && !f.has(y.synset_id) && f.set(y.synset_id, []), y && y.synset_id && f.get(y.synset_id).push(y); } for (let d = 0; d < o.length; d++) { const y = o[d]; y && y.synset_id && !a.has(y.synset_id) && a.set(y.synset_id, []), y && y.synset_id && a.get(y.synset_id).push(y); } for (let d = 0; d < l.length; d++) { const y = l[d]; y && y.source_id && !u.has(y.source_id) && u.set(y.source_id, []), y && y.source_id && u.get(y.source_id).push(y); } for (let d = 0; d < c.length; d++) { const y = c[d]; y && y.synset_id && !w.has(y.synset_id) && w.set(y.synset_id, []), y && y.synset_id && w.get(y.synset_id).push(y); } const g = []; for (const d of n) { const y = f.get(d.id) || [], b = a.get(d.id) || [], I = u.get(d.id) || [], p = w.get(d.id) || []; g.push({ id: d.id, pos: d.pos, language: d.language || "", lexicon: d.lexicon, definitions: y.map((m) => ({ id: m.id, language: m.language, text: m.text, source: m.source || void 0 })), examples: b.map((m) => ({ id: m.id, language: m.language || "", text: m.text, source: m.source || "" })), relations: I.map((m) => ({ id: m.id, type: m.type, target: m.target_id, source: m.source || "" })), memberIds: p.map((m) => m?.word_id).filter(Boolean), senseIds: p.map((m) => m?.id).filter(Boolean), ili: d.ili || void 0 }); } return g; } // Fast Strategy - Minimal data loading (no related data) async getSynsetsFast(e = {}) { const n = await Y(this.db, e).execute(), t = []; for (const i of n) t.push(await this.transformSynsetRecordFast(i)); return t; } async getSynsetById(e, s = {}) { const n = await Z(this.db, e).executeTakeFirst(); return n ? await this.transformSynsetRecord(n, s) : void 0; } // Optimized synset query methods async getSynsetsByFormFast(e, s = {}) { const t = await X(this.db, e, s).execute(), i = []; for (const o of t) i.push(await this.transformSynsetRecord(o, s)); return i; } // Sense queries async getSenses(e = {}) { return this.getSensesV5(e); } // V1 Strategy - DEPRECATED: Use V5 or V6 for better performance // Performance: ~4-39 Hz (slow) // This strategy is kept for backward compatibility only async getSensesV1(e = {}) { const n = await F(this.db, e).execute(); return await Promise.all(n.map(this.transformSenseRecord.bind(this))); } // V5 Strategy - Ultra-fast with caching and optimized queries // Performance: ~50,000+ Hz (ultra-fast) // Best for: Production applications with repeated queries async getSensesV5(e = {}) { const s = `senses:${JSON.stringify(e)}`; if (this.queryCache.has(s)) return this.cacheHits++, this.queryCache.get(s); this.cacheMisses++; const t = await F(this.db, e).execute(), i = []; for (const o of t) i.push(this.transformSenseRecord(o)); if (this.queryCache.set(s, i), this.queryCache.size > 1e3) { const o = this.queryCache.keys().next().value; o && this.queryCache.delete(o); } return i; } // V6 Strategy - Memory-optimized with batch loading // Performance: ~1,000+ Hz (very fast) // Best for: Consistent performance without caching complexity async getSensesV6(e = {}) { const n = await F(this.db, e).execute(), t = new Array(n.length); for (let i = 0; i < n.length; i++) t[i] = this.transformSenseRecord(n[i]); return t; } async getSenseById(e) { const s = await O(this.db, e).executeTakeFirst(); return s ? this.transformSenseRecord(s) : void 0; } // Definition queries async getDefinitionsBySynsetId(e) { return se(this.db, e).execute(); } // ILI queries async getIliById(e) { const s = await ie(this.db, e).executeTakeFirst(); return s ? this.transformIliRecord(s) : void 0; } async getIlis(e = {}) { return (await re(this.db, e).execute()).map(this.transformIliRecord.bind(this)); } // Statistics queries async getStatistics() { try { const e = ce(this.db), s = await Promise.all([ e.totalWords.execute(), e.totalSynsets.execute(), e.totalSenses.execute(), e.totalILIs.execute(), e.totalLexicons.execute() ]), n = (i) => { const o = i?.[0]?.count; return Number(typeof o == "bigint" ? o : o ?? 0); }; return { totalWords: n(s[0]), totalSynsets: n(s[1]), totalSenses: n(s[2]), totalILIs: n(s[3]), totalLexicons: n(s[4]) }; } catch { return { totalWords: 0, totalSynsets: 0, totalSenses: 0, totalILIs: 0, totalLexicons: 0 }; } } // Batch insert operations async batchInsert(e, s) { return _(this.db, e, s); } // Transformation methods to convert database records to wn-ts-core types transformLexiconRecord(e) { const s = { id: e.id, label: e.label, language: e.language }; return e.email !== void 0 && (s.email = e.email), e.license !== void 0 && (s.license = e.license), e.version !== void 0 && (s.version = e.version), e.url !== void 0 && (s.url = e.url), e.citation !== void 0 && (s.citation = e.citation), e.logo !== void 0 && (s.logo = e.logo), e.metadata !== void 0 && (s.metadata = JSON.parse(e.metadata)), s; } async transformWordRecord(e) { return { id: e.id, lemma: e.lemma, pos: e.pos, forms: [], // Will be populated separately if needed pronunciations: [], tags: [], counts: [], language: e.language || "", lexicon: e.lexicon }; } // Strategy-specific transformation methods for type safety async transformSynsetRecordV1(e) { const s = await this.getDefinitionsBySynsetId(e.id), n = await this.getExamplesBySynsetId(e.id), t = await this.getRelationsBySynsetId(e.id), i = await D(this.db, e.id).execute(), o = await B(this.db, e.id).execute(), l = { id: e.id, pos: e.pos, language: e.language, lexicon: e.lexicon, definitions: s.map((c) => ({ id: c.id, language: c.language, text: c.text, source: c.source || "" })), examples: n.map((c) => ({ id: c.id, language: c.language, text: c.text, source: c.source || "" })), relations: t.map((c) => ({ id: c.id, type: c.type, target: c.target_id, source: c.source || "" })), memberIds: i.map((c) => c.word_id), senseIds: o.map((c) => c.id) }; return e.ili !== void 0 && (l.ili = e.ili), l; } async transformSynsetRecordV2(e) { const s = await this.getDefinitionsBySynsetId(e.id), n = await this.getExamplesBySynsetId(e.id), t = await this.getRelationsBySynsetId(e.id), i = await D(this.db, e.id).execute(), o = await B(this.db, e.id).execute(), l = { id: e.id, pos: e.pos, language: e.language || void 0, lexicon: e.lexicon, definitions: s.map((c) => ({ id: c.id, language: c.language, text: c.text, source: c.source || "" })), examples: n.map((c) => ({ id: c.id, language: c.language, text: c.text, source: c.source || "" })), relations: t.map((c) => ({ id: c.id, type: c.type, target: c.target_id, source: c.source || "" })), memberIds: i.map((c) => c.word_id), senseIds: o.map((c) => c.id) }; return e.ili !== void 0 && (l.ili = e.ili), l; } async transformSynsetRecordFast(e) { const s = { id: e.id, pos: e.pos, language: e.language || void 0, lexicon: e.lexicon, definitions: [], examples: [], relations: [], memberIds: [], senseIds: [] }; return e.ili !== void 0 && (s.ili = e.ili), s; } // Legacy method for backward compatibility async transformSynsetRecord(e, s = {}) { const n = s.strategy || this.defaultStrategy, t = this.getOptionsForStrategy(n); s.includeDefinitions !== void 0 && (t.includeDefinitions = s.includeDefinitions), s.includeExamples !== void 0 && (t.includeExamples = s.includeExamples), s.includeRelations !== void 0 && (t.includeRelations = s.includeRelations), s.includeSenses !== void 0 && (t.includeSenses = s.includeSenses); const i = { id: e.id, pos: e.pos, language: e.language, lexicon: e.lexicon, definitions: [], examples: [], relations: [], memberIds: [], senseIds: [] }; if (e.ili !== void 0 && (i.ili = e.ili), t.includeDefinitions) { const o = await this.getDefinitionsBySynsetId(e.id); i.definitions = o.map((l) => ({ id: l.id, language: l.language, text: l.text, source: l.source || "" })); } if (t.includeExamples) { const o = await this.getExamplesBySynsetId(e.id); i.examples = o.map((l) => ({ id: l.id, language: l.language, text: l.text, source: l.source || "" })); } if (t.includeRelations) { const o = await this.getRelationsBySynsetId(e.id); i.relations = o.map((l) => ({ id: l.id, type: l.type, target: l.target_id, source: l.source || "" })); } if (t.includeSenses) { const o = await D(this.db, e.id).execute(), l = await B(this.db, e.id).execute(); i.memberIds = o.map((c) => c.word_id), i.senseIds = l.map((c) => c.id); } return i; } transformSenseRecord(e) { const s = { id: e.id, wordId: e.word_id, synsetId: e.synset_id, examples: [], // Missing property counts: [], // Missing property tags: [] // Missing property }; return e.source !== void 0 && (s.source = e.source), e.sensekey !== void 0 && (s.sensekey = e.sensekey), e.adjposition !== void 0 && (s.adjposition = e.adjposition), e.subcategory !== void 0 && (s.subcategory = e.subcategory), e.domain !== void 0 && (s.domain = e.domain), e.register !== void 0 && (s.register = e.register), s; } transformIliRecord(e) { const s = { id: e.id, status: e.status }; return e.definition !== void 0 && (s.definition = e.definition), e.superseded_by !== void 0 && (s.supersededBy = e.superseded_by), e.note !== void 0 && (s.note = e.note), s; } // Additional methods for export functionality async getWordsByLexicon(e) { return Q(this.db, e).execute(); } async getSensesByWordId(e) { return P(this.db, e).execute(); } async getSynsetsByLexicon(e) { return ee(this.db, e).execute(); } async getExamplesBySynsetId(e) { return oe(this.db, e).execute(); } async getSensesBySynsetId(e) { return K(this.db, e).execute(); } async getWordsByIds(e) { if (!e || e.length === 0) return []; const s = await T(this.db, e).execute(); return await Promise.all(s.map(this.transformWordRecord.bind(this))); } async getWordsBySynsetAndLanguage(e, s) { const t = await $(this.db, e, s).execute(), i = /* @__PURE__ */ new Set(), o = []; for (const l of t) i.has(l.id) || (i.add(l.id), o.push(await this.transformWordRecord(l))); return o; } async getWordsByIliAndLanguage(e, s) { const t = await k(this.db, e, s).execute(), i = /* @__PURE__ */ new Set(), o = []; for (const l of t) i.has(l.id) || (i.add(l.id), o.push(await this.transformWordRecord(l))); return o; } async getWordsByIliAndLexiconPrefix(e, s) { const t = await M(this.db, e, s).execute(), i = /* @__PURE__ */ new Set(), o = []; for (const l of t) i.has(l.id) || (i.add(l.id), o.push(await this.transformWordRecord(l))); return o; } async getRelationsBySynsetId(e) { return ae(this.db, e).execute(); } /** * Get forms for a specific word */ async getFormsByWordId(e) { return le(this.db, e).execute(); } } async function ue(r) { const e = r.schema; await fe(e), await we(e), await me(e), await he(e), await ge(e), await xe(e), await _e(e), await pe(e), await Se(e); } async function ye(r) { const e = r.schema; await e.createIndex("idx_words_lemma").ifNotExists().on("words").column("lemma").execute(), await e.createIndex("idx_words_language").ifNotExists().on("words").column("language").execute(), await e.createIndex("idx_words_lexicon").ifNotExists().on("words").column("lexicon").execute(), await e.createIndex("idx_words_pos").ifNotExists().on("words").column("pos").execute(), await e.createIndex("idx_synsets_language").ifNotExists().on("synsets").column("language").execute(), await e.createIndex("idx_synsets_lexicon").ifNotExists().on("synsets").column("lexicon").execute(), await e.createIndex("idx_synsets_ili").ifNotExists().on("synsets").column("ili").execute(), await e.createIndex("idx_senses_word_id").ifNotExists().on("senses").column("word_id").execute(), await e.createIndex("idx_senses_synset_id").ifNotExists().on("senses").column("synset_id").execute(), await e.createIndex("idx_forms_word_id").ifNotExists().on("forms").column("word_id").execute(), await e.createIndex("idx_forms_written_form").ifNotExists().on("forms").column("written_form").execute(), await e.createIndex("idx_definitions_synset_id").ifNotExists().on("definitions").column("synset_id").execute(), await e.createIndex("idx_examples_synset_id").ifNotExists().on("examples").column("synset_id").execute(), await e.createIndex("idx_examples_sense_id").ifNotExists().on("examples").column("sense_id").execute(), await e.createIndex("idx_relations_source_id").ifNotExists().on("relations").column("source_id").execute(), await e.createIndex("idx_relations_target_id").ifNotExists().on("relations").column("target_id").execute(), await e.createIndex("idx_relations_type").ifNotExists().on("relations").column("type").execute(); } async function fe(r) { await r.createTable("lexicons").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("label", "text", (e) => e.notNull()).addColumn("language", "text", (e) => e.notNull()).addColumn("email", "text").addColumn("license", "text").addColumn("version", "text").addColumn("url", "text").addColumn("citation", "text").addColumn("logo", "text").addColumn("metadata", "text").execute(); } async function we(r) { await r.createTable("words").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("lemma", "text", (e) => e.notNull()).addColumn("pos", "text", (e) => e.notNull()).addColumn("language", "text", (e) => e.notNull()).addColumn("lexicon", "text", (e) => e.notNull().references("lexicons.id").onDelete("cascade")).execute(); } async function me(r) { await r.createTable("forms").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("word_id", "text", (e) => e.notNull().references("words.id").onDelete("cascade")).addColumn("written_form", "text", (e) => e.notNull()).addColumn("script", "text").addColumn("tag", "text").execute(); } async function he(r) { await r.createTable("synsets").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("ili", "text").addColumn("pos", "text", (e) => e.notNull()).addColumn("language", "text", (e) => e.notNull()).addColumn("lexicon", "text", (e) => e.notNull().references("lexicons.id").onDelete("cascade")).execute(); } async function ge(r) { await r.createTable("senses").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("word_id", "text", (e) => e.notNull().references("words.id").onDelete("cascade")).addColumn("synset_id", "text", (e) => e.notNull().references("synsets.id").onDelete("cascade")).addColumn("source", "text").addColumn("sensekey", "text").addColumn("adjposition", "text").addColumn("subcategory", "text").addColumn("domain", "text").addColumn("register", "text").execute(); } async function xe(r) { await r.createTable("definitions").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("synset_id", "text", (e) => e.notNull().references("synsets.id").onDelete("cascade")).addColumn("language", "text", (e) => e.notNull()).addColumn("text", "text", (e) => e.notNull()).addColumn("source", "text").execute(); } async function _e(r) { await r.createTable("relations").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("source_id", "text", (e) => e.notNull()).addColumn("target_id", "text", (e) => e.notNull()).addColumn("type", "text", (e) => e.notNull()).addColumn("source", "text").execute(); } async function pe(r) { await r.createTable("examples").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("synset_id", "text", (e) => e.references("synsets.id").onDelete("cascade")).addColumn("sense_id", "text", (e) => e.references("senses.id").onDelete("cascade")).addColumn("language", "text", (e) => e.notNull()).addColumn("text", "text", (e) => e.notNull()).addColumn("source", "text").execute(); } async function Se(r) { await r.createTable("ilis").ifNotExists().addColumn("id", "text", (e) => e.primaryKey()).addColumn("definition", "text").addColumn("status", "text", (e) => e.notNull()).addColumn("superseded_by", "text").addColumn("note", "text").addColumn("meta", "text").execute(); } class S { /** * Create all tables in the correct order to respect foreign key constraints */ static async createTables(e) { return ue(e); } /** * Create all indexes for optimal performance */ static async createIndexes(e) { return ye(e); } } async function be(r, e) { await r.deleteFrom("forms").where( "word_id", "in", r.selectFrom("words").select("id").where("lexicon", "=", e) ).execute(), await r.deleteFrom("definitions").where( "synset_id", "in", r.selectFrom("synsets").select("id").where("lexicon", "=", e) ).execute(), await r.deleteFrom("relations").where( (s) => s.or([ s("source_id", "in", r.selectFrom("synsets").select("id").where("lexicon", "=", e)), s("target_id", "in", r.selectFrom("synsets").select("id").where("lexicon", "=", e)) ]) ).execute(), await r.deleteFrom("examples").where( "synset_id", "in", r.selectFrom("synsets").select("id").where("lexicon", "=", e) ).execute(), await r.deleteFrom("senses").where( "word_id", "in", r.selectFrom("words").select("id").where("lexicon", "=", e) ).execute(), await r.deleteFrom("words").where("lexicon", "=", e).execute(), await r.deleteFrom("synsets").where("lexicon", "=", e).execute(), await r.deleteFrom("lexicons").where("id", "=", e).execute(); } async function Ie(r, e) { const n = (await r.selectFrom("words").select("id").where("lexicon", "=", e).execute()).map((t) => t.id); n.length > 0 && (await r.deleteFrom("forms").where("word_id", "in", n).execute(), await r.deleteFrom("senses").where("word_id", "in", n).execute(), await r.deleteFrom("words").where("id", "in", n).execute()); } async function Fe(r, e) { const n = (await r.selectFrom("synsets").select("id").where("lexicon", "=", e).execute()).map((t) => t.id); n.length > 0 && (await r.deleteFrom("definitions").where("synset_id", "in", n).execute(), await r.deleteFrom("relations").where( (t) => t.or([ t("source_id", "in", n), t("target_id", "in", n) ]) ).execute(), await r.deleteFrom("examples").where("synset_id", "in", n).execute(), await r.deleteFrom("senses").where("synset_id", "in", n).execute(), await r.deleteFrom("synsets").where("id", "in", n).execute()); } async function De(r) { const e = [ "forms", "definitions", "relations", "examples", "senses", "words", "synsets", "ilis", "lexicons" ]; for (const s of e) try { await r.deleteFrom(s).execute(); } catch { } } class C { /** * Delete all data from a lexicon in the correct order to respect foreign key constraints */ static async deleteLexicon(e, s) { return be(e, s); } /** * Delete all words from a lexicon */ static async deleteWordsByLexicon(e, s) { return Ie(e, s); } /** * Delete all synsets from a lexicon */ static async deleteSynsetsByLexicon(e, s) { return Fe(e, s); } /** * Clear all data from the database in the correct order */ static async clearAllData(e) { return De(e); } /** * Get comprehensive lexicon statistics */ static async getLexiconStatistics(e, s) { return (await e.selectFrom("lexicons").select((i) => [ "lexicons.id as lexiconId", "lexicons.label", "lexicons.language", "lexicons.version", i.selectFrom("words").select(i.fn.countAll().as("wc")).whereRef("words.lexicon", "=", "lexicons.id").as("word_count"), i.selectFrom("synsets").select(i.fn.countAll().as("sc")).whereRef("synsets.lexicon", "=", "lexicons.id").as("synset_count"), i.selectFrom("senses").innerJoin("words", "senses.word_id", "words.id").select(i.fn.countAll().as("sc")).whereRef("words.lexicon", "=", "lexicons.id").as("sense_count"), i.selectFrom("synsets").select(i.fn.countAll().as("ic")).whereRef("synsets.lexicon", "=", "lexicons.id").where("synsets.ili", "is not", null).as("ili_count") ]).$if(!!s, (i) => i.where("lexicons.id", "=", s)).execute()).map((i) => ({ lexiconId: i.lexiconId, label: i.label || "", language: i.language || "", version: i.version ?? "", wordCount: Number(i.word_count ?? 0), synsetCount: Number(i.synset_count ?? 0), senseCount: Number(i.sense_count ?? 0), iliCount: Number(i.ili_count ?? 0) })); } /** * Get data quality metrics */ static async getDataQualityMetrics(e) { const s = await e.selectFrom("synsets").select(e.fn.countAll().as("count")).executeTakeFirst(), n = await e.selectFrom("synsets").where("ili", "is not", null).select(e.fn.countAll().as("count")).executeTakeFirst(), t = await e.selectFrom("senses").select(e.fn.count("synset_id").distinct().as("count")).executeTakeFirst(), i = await e.selectFrom("definitions").select(e.fn.count("synset_id").distinct().as("count")).executeTakeFirst(), o = await e.selectFrom("examples").select(e.fn.count("synset_id").distinct().as("count")).executeTakeFirst(), l = Number(s?.count ?? 0), c = Number(n?.count ?? 0), f = Number(i?.count ?? 0), a = Number(t?.count ?? 0), u = Number(o?.count ?? 0); let w = 0; return a > 0 && (w = a / l), { synsetsWithILI: c, synsetsWithoutILI: l - c, iliCoveragePercentage: l > 0 ? c / l * 100 : 0, emptySynsets: l - a, synsetsWithDefinitions: f, synsetsWithExamples: u, averageSynsetSize: w }; } /** * Get part of speech distribution */ static async getPartOfSpeechDistribution(e) { const s = await e.selectFrom("words").select(["pos", (t) => t.fn.countAll().as("count")]).groupBy("pos").execute(), n = {}; return s.forEach((t) => { n[t.pos] = Number(t.count); }), n; } /** * Get synset size analysis */ static async getSynsetSizeAnalysis(e) { const s = await e.selectFrom("synsets").leftJoin("senses", "synsets.id", "senses.synset_id").select(["synsets.id", (c) => c.fn.count("senses.id").as("size")]).groupBy("synsets.id").execute(); if (s.length === 0) return { averageSize: 0, maxSize: 0, minSize: 0, sizeDistribution: {} }; const n = s.map((c) => Number(c.size)), t = n.reduce((c, f) => c + f, 0) / n.length, i = Math.max(...n), o = Math.min(...n), l = {}; return n.forEach((c) => { l[c] = (l[c] || 0) + 1; }), { averageSize: t, maxSize: i, minSize: o, sizeDistribution: l }; } } class Be { db; sqliteDb; // better-sqlite3 Database instance config; constructor(e) { this.config = e; } async initialize() { const e = (await import("better-sqlite3")).default; if (this.config.forceRecreate) try { const n = await import("fs"); n.existsSync(this.config.filename) && n.unlinkSync(this.config.filename); } catch { } this.sqliteDb = new e(this.config.filename, { readonly: this.config.readonly || !1, fileMustExist: this.config.fileMustExist || !1, timeout: this.config.timeout || 5e3, verbose: this.config.verbose ? console.log : void 0 }); const s = new v({ database: this.sqliteDb }); this.db = new W({ dialect: s }), await S.createTables(this.db), await S.createIndexes(this.db); } getDatabase() { if (!this.db) throw new Error("Database not initialized. Call initialize() first."); return this.db; } async close() { this.sqliteDb && (this.sqliteDb.close(), this.sqliteDb = void 0), this.db = void 0; } /** * Get the underlying better-sqlite3 database instance * This is useful for operations that need direct SQLite access */ getSqliteDatabase() { return this.sqliteDb; } /** * Execute a raw SQL query (useful for complex operations) */ async executeRaw(e, s = []) { if (!this.sqliteDb) throw new Error("Database not initialized"); return this.sqliteDb.prepare(e).all(s); } /** * Execute a raw SQL query that returns a single row */ async executeRawSingle(e, s = []) { if (!this.sqliteDb) throw new Error("Database not initialized"); return this.sqliteDb.prepare(e).get(s); } /** * Begin a transaction */ async beginTransaction() { if (!this.sqliteDb) throw new Error("Database not initialized"); this.sqliteDb.exec("BEGIN TRANSACTION"); } /** * Commit a transaction */ async commitTransaction() { if (!this.sqliteDb) throw new Error("Database not initialized"); this.sqliteDb.exec("COMMIT"); } /** * Rollback a transaction */ async rollbackTransaction(