UNPKG

wn-ts-node

Version:
181 lines (180 loc) 11.4 kB
async function L(o, d, e = {}) { const { outputReconstructed: i = !0, outputPath: s, ignoreWhitespace: l = !0, ignoreOrder: c = !0, detailedDiff: f = !0 } = e; try { const n = await g.loadFile(d); console.log("🔍 Starting LMF data integrity validation"), console.log(`📄 Original XML: ${d} (${n.length} characters)`); const _ = await o.getLexicons(); console.log(`📚 Found ${_.length} lexicons in database`); const u = await b(o, _); console.log(`🔄 Reconstructed XML (${u.length} characters)`); let t; i && (t = s || g.generateOutputPath(d), await g.saveFile(t, u), console.log(`💾 Saved reconstructed XML to: ${t}`)); const a = await x(n, u, { ignoreWhitespace: l, ignoreOrder: c, detailedDiff: f }), $ = w(a, n, u); return a.length === 0 ? console.log("✅ Validation PASSED - No differences found!") : (console.log(`⚠️ Validation FAILED - Found ${a.length} differences`), f && (a.slice(0, 10).forEach((r, y) => { console.log(` ${y + 1}. ${r.type}: ${r.path} - ${r.details}`); }), a.length > 10 && console.log(` ... and ${a.length - 10} more differences`))), { success: a.length === 0, originalFile: d, reconstructedFile: t, differences: a, summary: $ }; } catch (n) { throw console.error(`❌ Validation failed with error: ${n}`), new Error(`LMF validation failed: ${n}`); } } async function b(o, d) { let e = `<?xml version="1.0" encoding="UTF-8"?> `; e += `<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd"> `, e += `<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/"> `; for (const i of d) { e += ` <Lexicon id="${i.id}" label="${i.label}" language="${i.language}"`, e += ` email="${i.email || "maintainer@example.com"}"`, e += ` license="${i.license || "https://creativecommons.org/licenses/by/4.0/"}"`, e += ` version="${i.version || "1"}"`, i.url && (e += ` url="${i.url}"`), i.citation && (e += ` citation="${i.citation}"`), i.dc_contributor && (e += ` dc:contributor="${i.dc_contributor}"`), i.dc_coverage && (e += ` dc:coverage="${i.dc_coverage}"`), i.dc_creator && (e += ` dc:creator="${i.dc_creator}"`), i.dc_date && (e += ` dc:date="${i.dc_date}"`), i.dc_description && (e += ` dc:description="${i.dc_description}"`), i.dc_format && (e += ` dc:format="${i.dc_format}"`), i.dc_identifier && (e += ` dc:identifier="${i.dc_identifier}"`), i.dc_publisher && (e += ` dc:publisher="${i.dc_publisher}"`), i.dc_relation && (e += ` dc:relation="${i.dc_relation}"`), i.dc_rights && (e += ` dc:rights="${i.dc_rights}"`), i.dc_source && (e += ` dc:source="${i.dc_source}"`), i.dc_subject && (e += ` dc:subject="${i.dc_subject}"`), i.dc_title && (e += ` dc:title="${i.dc_title}"`), i.dc_type && (e += ` dc:type="${i.dc_type}"`), i.status && (e += ` status="${i.status}"`), i.note && (e += ` note="${i.note}"`), i.confidence_score && (e += ` confidenceScore="${i.confidence_score}"`), e += `> `; const s = await o.getWords(i.id); for (const c of s) { e += ` <LexicalEntry id="${c.id}"`, c.dc_contributor && (e += ` dc:contributor="${c.dc_contributor}"`), c.dc_coverage && (e += ` dc:coverage="${c.dc_coverage}"`), c.dc_creator && (e += ` dc:creator="${c.dc_creator}"`), c.dc_date && (e += ` dc:date="${c.dc_date}"`), c.dc_description && (e += ` dc:description="${c.dc_description}"`), c.dc_format && (e += ` dc:format="${c.dc_format}"`), c.dc_identifier && (e += ` dc:identifier="${c.dc_identifier}"`), c.dc_publisher && (e += ` dc:publisher="${c.dc_publisher}"`), c.dc_relation && (e += ` dc:relation="${c.dc_relation}"`), c.dc_rights && (e += ` dc:rights="${c.dc_rights}"`), c.dc_source && (e += ` dc:source="${c.dc_source}"`), c.dc_subject && (e += ` dc:subject="${c.dc_subject}"`), c.dc_title && (e += ` dc:title="${c.dc_title}"`), c.dc_type && (e += ` dc:type="${c.dc_type}"`), c.status && (e += ` status="${c.status}"`), c.note && (e += ` note="${c.note}"`), c.confidence_score && (e += ` confidenceScore="${c.confidence_score}"`), e += `> `, e += ` <Lemma partOfSpeech="${c.pos}" writtenForm="${c.lemma}"`, c.script && (e += ` script="${c.script}"`), e += `> `; const f = await o.getWordTags(c.id); for (const t of f) e += ` <Tag category="${t.category}">${t.text}</Tag> `; e += ` </Lemma> `; const n = await o.getForms(c.id); for (const t of n) { e += ` <Form writtenForm="${t.written_form}"`, t.script && (e += ` script="${t.script}"`), e += `> `; const a = await o.getFormTags(t.id); for (const $ of a) e += ` <Tag category="${$.category}">${$.text}</Tag> `; e += ` </Form> `; } const _ = await o.getSenses(c.id); for (const t of _) { e += ` <Sense id="${t.id}" synset="${t.synset_id}"`, t.lexicalized !== void 0 && (e += ` lexicalized="${t.lexicalized}"`), t.adjposition && (e += ` adjposition="${t.adjposition}"`), t.dc_contributor && (e += ` dc:contributor="${t.dc_contributor}"`), t.dc_coverage && (e += ` dc:coverage="${t.dc_coverage}"`), t.dc_creator && (e += ` dc:creator="${t.dc_creator}"`), t.dc_date && (e += ` dc:date="${t.dc_date}"`), t.dc_description && (e += ` dc:description="${t.dc_description}"`), t.dc_format && (e += ` dc:format="${t.dc_format}"`), t.dc_identifier && (e += ` dc:identifier="${t.dc_identifier}"`), t.dc_publisher && (e += ` dc:publisher="${t.dc_publisher}"`), t.dc_relation && (e += ` dc:relation="${t.dc_relation}"`), t.dc_rights && (e += ` dc:rights="${t.dc_rights}"`), t.dc_source && (e += ` dc:source="${t.dc_source}"`), t.dc_subject && (e += ` dc:subject="${t.dc_subject}"`), t.dc_title && (e += ` dc:title="${t.dc_title}"`), t.dc_type && (e += ` dc:type="${t.dc_type}"`), t.status && (e += ` status="${t.status}"`), t.note && (e += ` note="${t.note}"`), t.confidence_score && (e += ` confidenceScore="${t.confidence_score}"`), e += `> `; const a = await o.getSenseRelations(t.id); for (const r of a) e += ` <SenseRelation relType="${r.rel_type}" target="${r.target}"`, r.dc_type && (e += ` dc:type="${r.dc_type}"`), e += ` /> `; const $ = await o.getSenseExamples(t.id); for (const r of $) e += " <Example", r.language && (e += ` language="${r.language}"`), e += `>${r.text}</Example> `; const m = await o.getSenseCounts(t.id); for (const r of m) e += " <Count", r.dc_source && (e += ` dc:source="${r.dc_source}"`), e += `>${r.value}</Count> `; e += ` </Sense> `; } const u = await o.getSyntacticBehaviours(c.id); for (const t of u) e += ` <SyntacticBehaviour senses="${t.senses}" subcategorizationFrame="${t.subcategorization_frame}"`, t.dc_contributor && (e += ` dc:contributor="${t.dc_contributor}"`), t.dc_coverage && (e += ` dc:coverage="${t.dc_coverage}"`), t.dc_creator && (e += ` dc:creator="${t.dc_creator}"`), t.dc_date && (e += ` dc:date="${t.dc_date}"`), t.dc_description && (e += ` dc:description="${t.dc_description}"`), t.dc_format && (e += ` dc:format="${t.dc_format}"`), t.dc_identifier && (e += ` dc:identifier="${t.dc_identifier}"`), t.dc_publisher && (e += ` dc:publisher="${t.dc_publisher}"`), t.dc_relation && (e += ` dc:relation="${t.dc_relation}"`), t.dc_rights && (e += ` dc:rights="${t.dc_rights}"`), t.dc_source && (e += ` dc:source="${t.dc_source}"`), t.dc_subject && (e += ` dc:subject="${t.dc_subject}"`), t.dc_title && (e += ` dc:title="${t.dc_title}"`), t.dc_type && (e += ` dc:type="${t.dc_type}"`), t.status && (e += ` status="${t.status}"`), t.note && (e += ` note="${t.note}"`), t.confidence_score && (e += ` confidenceScore="${t.confidence_score}"`), e += ` /> `; e += ` </LexicalEntry> `; } const l = await o.getSynsets(i.id); for (const c of l) { e += ` <Synset id="${c.id}" ili="${c.ili || ""}" partOfSpeech="${c.pos}"`, c.lexicalized !== void 0 && (e += ` lexicalized="${c.lexicalized}"`), c.dc_contributor && (e += ` dc:contributor="${c.dc_contributor}"`), c.dc_coverage && (e += ` dc:coverage="${c.dc_coverage}"`), c.dc_creator && (e += ` dc:creator="${c.dc_creator}"`), c.dc_date && (e += ` dc:date="${c.dc_date}"`), c.dc_description && (e += ` dc:description="${c.dc_description}"`), c.dc_format && (e += ` dc:format="${c.dc_format}"`), c.dc_identifier && (e += ` dc:identifier="${c.dc_identifier}"`), c.dc_publisher && (e += ` dc:publisher="${c.dc_publisher}"`), c.dc_relation && (e += ` dc:relation="${c.dc_relation}"`), c.dc_rights && (e += ` dc:rights="${c.dc_rights}"`), c.dc_source && (e += ` dc:source="${c.dc_source}"`), c.dc_subject && (e += ` dc:subject="${c.dc_subject}"`), c.dc_title && (e += ` dc:title="${c.dc_title}"`), c.dc_type && (e += ` dc:type="${c.dc_type}"`), c.status && (e += ` status="${c.status}"`), c.note && (e += ` note="${c.note}"`), c.confidence_score && (e += ` confidenceScore="${c.confidence_score}"`), e += `> `; const f = await o.getDefinitions(c.id); for (const t of f) e += " <Definition", t.language && (e += ` language="${t.language}"`), t.source_sense && (e += ` sourceSense="${t.source_sense}"`), e += `>${t.text}</Definition> `; const n = await o.getILIDefinitions(c.id); for (const t of n) e += ` <ILIDefinition>${t.text}</ILIDefinition> `; const _ = await o.getSynsetRelations(c.id); for (const t of _) e += ` <SynsetRelation relType="${t.rel_type}" target="${t.target}" /> `; const u = await o.getSynsetExamples(c.id); for (const t of u) e += " <Example", t.language && (e += ` language="${t.language}"`), e += `>${t.text}</Example> `; e += ` </Synset> `; } e += ` </Lexicon> `; } return e += "</LexicalResource>", e; } async function x(o, d, e) { const i = []; try { if (e.ignoreWhitespace) { const c = h(o), f = h(d); if (c === f) return i; } const s = p(o), l = p(d); s !== l && i.push({ type: "structural_difference", path: "root", original: `Elements: ${s}`, reconstructed: `Elements: ${l}`, details: `Element count mismatch: original has ${s}, reconstructed has ${l}` }); } catch (s) { i.push({ type: "structural_difference", path: "comparison", details: `Failed to compare XML: ${s}` }); } return i; } function h(o) { return o.replace(/\s+/g, " ").replace(/>\s+</g, "><").trim(); } function p(o) { const d = o.match(/<[^!?][^>]*>/g); return d ? d.length : 0; } function w(o, d, e) { const i = p(d), s = o.filter((n) => n.type === "missing_element").length, l = o.filter((n) => n.type === "extra_element").length, c = o.filter((n) => n.type === "attribute_mismatch").length, f = i - s; return { totalElements: i, matchingElements: f, missingElements: s, extraElements: l, attributeMismatches: c }; } async function v(o) { throw new Error("loadFile not implemented - must be provided by framework"); } async function S(o, d) { throw new Error("saveFile not implemented - must be provided by framework"); } function E(o) { throw new Error("generateOutputPath not implemented - must be provided by framework"); } const g = { loadFile: v, saveFile: S, generateOutputPath: E }; export { g as f, L as v };