wn-ts-node
Version:
Wordnet interface library - TypeScript port
181 lines (180 loc) • 11.4 kB
JavaScript
async function L(o, d, e = {}) {
const {
outputReconstructed: i = !0,
outputPath: s,
ignoreWhitespace: l = !0,
ignoreOrder: c = !0,
detailedDiff: f = !0
} = e;
try {
const n = await g.loadFile(d);
console.log("🔍 Starting LMF data integrity validation"), console.log(`📄 Original XML: ${d} (${n.length} characters)`);
const _ = await o.getLexicons();
console.log(`📚 Found ${_.length} lexicons in database`);
const u = await b(o, _);
console.log(`🔄 Reconstructed XML (${u.length} characters)`);
let t;
i && (t = s || g.generateOutputPath(d), await g.saveFile(t, u), console.log(`💾 Saved reconstructed XML to: ${t}`));
const a = await x(n, u, {
ignoreWhitespace: l,
ignoreOrder: c,
detailedDiff: f
}), $ = w(a, n, u);
return a.length === 0 ? console.log("✅ Validation PASSED - No differences found!") : (console.log(`⚠️ Validation FAILED - Found ${a.length} differences`), f && (a.slice(0, 10).forEach((r, y) => {
console.log(` ${y + 1}. ${r.type}: ${r.path} - ${r.details}`);
}), a.length > 10 && console.log(` ... and ${a.length - 10} more differences`))), {
success: a.length === 0,
originalFile: d,
reconstructedFile: t,
differences: a,
summary: $
};
} catch (n) {
throw console.error(`❌ Validation failed with error: ${n}`), new Error(`LMF validation failed: ${n}`);
}
}
async function b(o, d) {
let e = `<?xml version="1.0" encoding="UTF-8"?>
`;
e += `<!DOCTYPE LexicalResource SYSTEM "http://globalwordnet.github.io/schemas/WN-LMF-1.0.dtd">
`, e += `<LexicalResource xmlns:dc="http://purl.org/dc/elements/1.1/">
`;
for (const i of d) {
e += ` <Lexicon id="${i.id}" label="${i.label}" language="${i.language}"`, e += ` email="${i.email || "maintainer@example.com"}"`, e += ` license="${i.license || "https://creativecommons.org/licenses/by/4.0/"}"`, e += ` version="${i.version || "1"}"`, i.url && (e += ` url="${i.url}"`), i.citation && (e += ` citation="${i.citation}"`), i.dc_contributor && (e += ` dc:contributor="${i.dc_contributor}"`), i.dc_coverage && (e += ` dc:coverage="${i.dc_coverage}"`), i.dc_creator && (e += ` dc:creator="${i.dc_creator}"`), i.dc_date && (e += ` dc:date="${i.dc_date}"`), i.dc_description && (e += ` dc:description="${i.dc_description}"`), i.dc_format && (e += ` dc:format="${i.dc_format}"`), i.dc_identifier && (e += ` dc:identifier="${i.dc_identifier}"`), i.dc_publisher && (e += ` dc:publisher="${i.dc_publisher}"`), i.dc_relation && (e += ` dc:relation="${i.dc_relation}"`), i.dc_rights && (e += ` dc:rights="${i.dc_rights}"`), i.dc_source && (e += ` dc:source="${i.dc_source}"`), i.dc_subject && (e += ` dc:subject="${i.dc_subject}"`), i.dc_title && (e += ` dc:title="${i.dc_title}"`), i.dc_type && (e += ` dc:type="${i.dc_type}"`), i.status && (e += ` status="${i.status}"`), i.note && (e += ` note="${i.note}"`), i.confidence_score && (e += ` confidenceScore="${i.confidence_score}"`), e += `>
`;
const s = await o.getWords(i.id);
for (const c of s) {
e += ` <LexicalEntry id="${c.id}"`, c.dc_contributor && (e += ` dc:contributor="${c.dc_contributor}"`), c.dc_coverage && (e += ` dc:coverage="${c.dc_coverage}"`), c.dc_creator && (e += ` dc:creator="${c.dc_creator}"`), c.dc_date && (e += ` dc:date="${c.dc_date}"`), c.dc_description && (e += ` dc:description="${c.dc_description}"`), c.dc_format && (e += ` dc:format="${c.dc_format}"`), c.dc_identifier && (e += ` dc:identifier="${c.dc_identifier}"`), c.dc_publisher && (e += ` dc:publisher="${c.dc_publisher}"`), c.dc_relation && (e += ` dc:relation="${c.dc_relation}"`), c.dc_rights && (e += ` dc:rights="${c.dc_rights}"`), c.dc_source && (e += ` dc:source="${c.dc_source}"`), c.dc_subject && (e += ` dc:subject="${c.dc_subject}"`), c.dc_title && (e += ` dc:title="${c.dc_title}"`), c.dc_type && (e += ` dc:type="${c.dc_type}"`), c.status && (e += ` status="${c.status}"`), c.note && (e += ` note="${c.note}"`), c.confidence_score && (e += ` confidenceScore="${c.confidence_score}"`), e += `>
`, e += ` <Lemma partOfSpeech="${c.pos}" writtenForm="${c.lemma}"`, c.script && (e += ` script="${c.script}"`), e += `>
`;
const f = await o.getWordTags(c.id);
for (const t of f)
e += ` <Tag category="${t.category}">${t.text}</Tag>
`;
e += ` </Lemma>
`;
const n = await o.getForms(c.id);
for (const t of n) {
e += ` <Form writtenForm="${t.written_form}"`, t.script && (e += ` script="${t.script}"`), e += `>
`;
const a = await o.getFormTags(t.id);
for (const $ of a)
e += ` <Tag category="${$.category}">${$.text}</Tag>
`;
e += ` </Form>
`;
}
const _ = await o.getSenses(c.id);
for (const t of _) {
e += ` <Sense id="${t.id}" synset="${t.synset_id}"`, t.lexicalized !== void 0 && (e += ` lexicalized="${t.lexicalized}"`), t.adjposition && (e += ` adjposition="${t.adjposition}"`), t.dc_contributor && (e += ` dc:contributor="${t.dc_contributor}"`), t.dc_coverage && (e += ` dc:coverage="${t.dc_coverage}"`), t.dc_creator && (e += ` dc:creator="${t.dc_creator}"`), t.dc_date && (e += ` dc:date="${t.dc_date}"`), t.dc_description && (e += ` dc:description="${t.dc_description}"`), t.dc_format && (e += ` dc:format="${t.dc_format}"`), t.dc_identifier && (e += ` dc:identifier="${t.dc_identifier}"`), t.dc_publisher && (e += ` dc:publisher="${t.dc_publisher}"`), t.dc_relation && (e += ` dc:relation="${t.dc_relation}"`), t.dc_rights && (e += ` dc:rights="${t.dc_rights}"`), t.dc_source && (e += ` dc:source="${t.dc_source}"`), t.dc_subject && (e += ` dc:subject="${t.dc_subject}"`), t.dc_title && (e += ` dc:title="${t.dc_title}"`), t.dc_type && (e += ` dc:type="${t.dc_type}"`), t.status && (e += ` status="${t.status}"`), t.note && (e += ` note="${t.note}"`), t.confidence_score && (e += ` confidenceScore="${t.confidence_score}"`), e += `>
`;
const a = await o.getSenseRelations(t.id);
for (const r of a)
e += ` <SenseRelation relType="${r.rel_type}" target="${r.target}"`, r.dc_type && (e += ` dc:type="${r.dc_type}"`), e += ` />
`;
const $ = await o.getSenseExamples(t.id);
for (const r of $)
e += " <Example", r.language && (e += ` language="${r.language}"`), e += `>${r.text}</Example>
`;
const m = await o.getSenseCounts(t.id);
for (const r of m)
e += " <Count", r.dc_source && (e += ` dc:source="${r.dc_source}"`), e += `>${r.value}</Count>
`;
e += ` </Sense>
`;
}
const u = await o.getSyntacticBehaviours(c.id);
for (const t of u)
e += ` <SyntacticBehaviour senses="${t.senses}" subcategorizationFrame="${t.subcategorization_frame}"`, t.dc_contributor && (e += ` dc:contributor="${t.dc_contributor}"`), t.dc_coverage && (e += ` dc:coverage="${t.dc_coverage}"`), t.dc_creator && (e += ` dc:creator="${t.dc_creator}"`), t.dc_date && (e += ` dc:date="${t.dc_date}"`), t.dc_description && (e += ` dc:description="${t.dc_description}"`), t.dc_format && (e += ` dc:format="${t.dc_format}"`), t.dc_identifier && (e += ` dc:identifier="${t.dc_identifier}"`), t.dc_publisher && (e += ` dc:publisher="${t.dc_publisher}"`), t.dc_relation && (e += ` dc:relation="${t.dc_relation}"`), t.dc_rights && (e += ` dc:rights="${t.dc_rights}"`), t.dc_source && (e += ` dc:source="${t.dc_source}"`), t.dc_subject && (e += ` dc:subject="${t.dc_subject}"`), t.dc_title && (e += ` dc:title="${t.dc_title}"`), t.dc_type && (e += ` dc:type="${t.dc_type}"`), t.status && (e += ` status="${t.status}"`), t.note && (e += ` note="${t.note}"`), t.confidence_score && (e += ` confidenceScore="${t.confidence_score}"`), e += ` />
`;
e += ` </LexicalEntry>
`;
}
const l = await o.getSynsets(i.id);
for (const c of l) {
e += ` <Synset id="${c.id}" ili="${c.ili || ""}" partOfSpeech="${c.pos}"`, c.lexicalized !== void 0 && (e += ` lexicalized="${c.lexicalized}"`), c.dc_contributor && (e += ` dc:contributor="${c.dc_contributor}"`), c.dc_coverage && (e += ` dc:coverage="${c.dc_coverage}"`), c.dc_creator && (e += ` dc:creator="${c.dc_creator}"`), c.dc_date && (e += ` dc:date="${c.dc_date}"`), c.dc_description && (e += ` dc:description="${c.dc_description}"`), c.dc_format && (e += ` dc:format="${c.dc_format}"`), c.dc_identifier && (e += ` dc:identifier="${c.dc_identifier}"`), c.dc_publisher && (e += ` dc:publisher="${c.dc_publisher}"`), c.dc_relation && (e += ` dc:relation="${c.dc_relation}"`), c.dc_rights && (e += ` dc:rights="${c.dc_rights}"`), c.dc_source && (e += ` dc:source="${c.dc_source}"`), c.dc_subject && (e += ` dc:subject="${c.dc_subject}"`), c.dc_title && (e += ` dc:title="${c.dc_title}"`), c.dc_type && (e += ` dc:type="${c.dc_type}"`), c.status && (e += ` status="${c.status}"`), c.note && (e += ` note="${c.note}"`), c.confidence_score && (e += ` confidenceScore="${c.confidence_score}"`), e += `>
`;
const f = await o.getDefinitions(c.id);
for (const t of f)
e += " <Definition", t.language && (e += ` language="${t.language}"`), t.source_sense && (e += ` sourceSense="${t.source_sense}"`), e += `>${t.text}</Definition>
`;
const n = await o.getILIDefinitions(c.id);
for (const t of n)
e += ` <ILIDefinition>${t.text}</ILIDefinition>
`;
const _ = await o.getSynsetRelations(c.id);
for (const t of _)
e += ` <SynsetRelation relType="${t.rel_type}" target="${t.target}" />
`;
const u = await o.getSynsetExamples(c.id);
for (const t of u)
e += " <Example", t.language && (e += ` language="${t.language}"`), e += `>${t.text}</Example>
`;
e += ` </Synset>
`;
}
e += ` </Lexicon>
`;
}
return e += "</LexicalResource>", e;
}
async function x(o, d, e) {
const i = [];
try {
if (e.ignoreWhitespace) {
const c = h(o), f = h(d);
if (c === f)
return i;
}
const s = p(o), l = p(d);
s !== l && i.push({
type: "structural_difference",
path: "root",
original: `Elements: ${s}`,
reconstructed: `Elements: ${l}`,
details: `Element count mismatch: original has ${s}, reconstructed has ${l}`
});
} catch (s) {
i.push({
type: "structural_difference",
path: "comparison",
details: `Failed to compare XML: ${s}`
});
}
return i;
}
function h(o) {
return o.replace(/\s+/g, " ").replace(/>\s+</g, "><").trim();
}
function p(o) {
const d = o.match(/<[^!?][^>]*>/g);
return d ? d.length : 0;
}
function w(o, d, e) {
const i = p(d), s = o.filter((n) => n.type === "missing_element").length, l = o.filter((n) => n.type === "extra_element").length, c = o.filter((n) => n.type === "attribute_mismatch").length, f = i - s;
return {
totalElements: i,
matchingElements: f,
missingElements: s,
extraElements: l,
attributeMismatches: c
};
}
async function v(o) {
throw new Error("loadFile not implemented - must be provided by framework");
}
async function S(o, d) {
throw new Error("saveFile not implemented - must be provided by framework");
}
function E(o) {
throw new Error("generateOutputPath not implemented - must be provided by framework");
}
const g = {
loadFile: v,
saveFile: S,
generateOutputPath: E
};
export {
g as f,
L as v
};