UNPKG

bible-checker

Version:

A bible tool to run several checks with a target translation and a source tranlation

284 lines (272 loc) 9.38 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.checkChapterVerseIntegrity = checkChapterVerseIntegrity; exports.detectShortLongVerses = detectShortLongVerses; exports.detectUnmatchedPunctuation = detectUnmatchedPunctuation; exports.extractVerses = extractVerses; /** * Extracts verses from the USJ JSON format. * @param {object} text - Parsed JSON object of the text. * @returns {object} Map of verse IDs to their content. */ function extractVerses(text) { const verses = {}; function traverseContent(content) { for (const item of content) { if (item.type === 'verse' && item.sid) { verses[item.sid] = ''; } else if (typeof item === 'string') { const lastVerseKey = Object.keys(verses).pop(); if (lastVerseKey) { verses[lastVerseKey] += item; } } else if (item.content) { traverseContent(item.content); } } } traverseContent(text.content); return verses; } /** * Extracts chapter and verse numbers from USJ content. * @param {array} content - Content array of a USJ file. * @returns {object} Map of chapters to arrays of verses. */ function extractChapterVerses(content) { const chapters = {}; function traverse(items, currentChapter = null) { for (const item of items) { if (item.type === "chapter" && item.number) { currentChapter = parseInt(item.number, 10); if (!chapters[currentChapter]) { chapters[currentChapter] = []; } } else if (item.type === "verse" && item.number) { const verse = parseInt(item.number, 10); if (currentChapter) { chapters[currentChapter].push(verse); } } else if (item.content) { traverse(item.content, currentChapter); } } } traverse(content); return chapters; } /** * Detects short, long, or empty verses based on length comparison between source and target. * @param {object} source - Parsed JSON object of the source text. * @param {object} target - Parsed JSON object of the target text. * @param {number} threshold - Percentage difference to consider (default: 20%). * @returns {object} Report of short/long verses and empty verses. */ function detectShortLongVerses(source, target, threshold = 20) { const issues = []; const sourceVerses = extractVerses(source); const targetVerses = extractVerses(target); for (const [key, sourceText] of Object.entries(sourceVerses)) { const targetText = targetVerses[key] || ''; const sourceLength = sourceText.trim().length; const targetLength = targetText.trim().length; // Detect empty source or target verses if (sourceLength === 0 && targetLength > 0) { issues.push({ source_verse: key, source_length: sourceLength, target_length: targetLength, difference: null, comment: 'Source verse is empty, but target contains text.' }); } else if (sourceLength > 0 && targetLength === 0) { issues.push({ source_verse: key, source_length: sourceLength, target_length: targetLength, difference: null, comment: 'Target verse is empty, but source contains text.' }); } else if (sourceLength > 0 && targetLength > 0) { // Detect short or long verses const diffPercentage = (targetLength - sourceLength) / sourceLength * 100; if (Math.abs(diffPercentage) > threshold) { issues.push({ source_verse: key, source_length: sourceLength, target_length: targetLength, difference: parseFloat(diffPercentage.toFixed(2)), comment: diffPercentage > 0 ? 'Target verse is too long compared to source.' : 'Target verse is too short compared to source.' }); } } } return { check: 'short_long_verses', issues }; } /** * Checks for missing, duplicated, or out-of-order chapter/verse numbers. * @param {object} source - Parsed JSON object of the source text. * @param {object} target - Parsed JSON object of the target text. * @returns {object} Report of chapter/verse integrity issues. */ function checkChapterVerseIntegrity(source, target) { const issues = []; const sourceChapterVerses = extractChapterVerses(source.content); const targetChapterVerses = extractChapterVerses(target.content); // Helper to validate order and duplication function validateIntegrity(chapterVerses, textType) { const seen = new Set(); let lastChapter = 0; let lastVerse = 0; for (let [chapter, verses] of Object.entries(chapterVerses)) { chapter = parseInt(chapter, 10); if (chapter < lastChapter) { issues.push({ type: "out_of_order", chapter, message: `${textType} has out-of-order chapter ${chapter}.` }); } lastChapter = chapter; for (const verse of verses) { if (verse < lastVerse) { console.log("verse ", verse); issues.push({ type: "out_of_order", chapter, verse, message: `${textType} has out-of-order verse ${verse} in chapter ${chapter}.` }); } if (seen.has(`${chapter}:${verse}`)) { issues.push({ type: "duplicate", chapter, verse, message: `${textType} has duplicate verse ${verse} in chapter ${chapter}.` }); } seen.add(`${chapter}:${verse}`); lastVerse = verse; } } } // Validate both source and target validateIntegrity(sourceChapterVerses, "Source"); validateIntegrity(targetChapterVerses, "Target"); // Detect missing verses in target for (const [chapter, verses] of Object.entries(sourceChapterVerses)) { const targetVerses = targetChapterVerses[chapter] || []; const missingVerses = verses.filter(verse => !targetVerses.includes(verse)); for (const missingVerse of missingVerses) { issues.push({ type: "missing", chapter: parseInt(chapter, 10), verse: missingVerse, message: `Target is missing verse ${missingVerse} in chapter ${chapter}.` }); } } return { check: "chapter_verse_integrity", issues }; } /** * Detects unmatched punctuation pairs across verses (e.g., quotes, parentheses). * @param {object} target - Parsed JSON object of the target text. * @param {object|null} pair_punctuation_list - Optional custom punctuation pairs. * @returns {object} Report of unmatched punctuation issues. */ function detectUnmatchedPunctuation(target, pair_punctuation_list = null) { const issues = []; const targetVerses = extractVerses(target); // Define default punctuation pairs or use provided ones let PAIR_PUNCTUATION = { '(': ')', '[': ']', '{': '}', '"': '"' // "'": "'", }; if (pair_punctuation_list !== null) { PAIR_PUNCTUATION = pair_punctuation_list; } let stack = []; // Shared stack for punctuation tracking let toggles = {}; // Toggles for characters that are the same for opening and closing let openVerse = null; // Keeps track of the verse where punctuation started // Initialize toggles for symmetric punctuation for (const char of Object.keys(PAIR_PUNCTUATION)) { if (PAIR_PUNCTUATION[char] === char) { // false means "not inside" toggles[char] = false; } } let yeet = 0; for (const [key, text] of Object.entries(targetVerses)) { for (const char of text) { if (PAIR_PUNCTUATION[char]) { if (PAIR_PUNCTUATION[char] === char) { // Handle symmetric punctuation using toggles toggles[char] = !toggles[char]; if (toggles[char]) { // Entering a symmetric punctuation if (stack.length === 0) openVerse = key; stack.push(char); } else { // Exiting a symmetric punctuation const last = stack.pop(); if (!last || last !== char) { issues.push({ verse: key, unmatched_punctuation: char, comment: `Unmatched closing punctuation: ${char}` }); } } } else { // Handle asymmetric punctuation (e.g., (), {}, etc.) if (stack.length === 0) { openVerse = key; } stack.push(char); } } else if (Object.values(PAIR_PUNCTUATION).includes(char)) { // Handle closing punctuation const last = stack.pop(); if (!last || PAIR_PUNCTUATION[last] !== char) { // Unmatched closing punctuation issues.push({ verse: key, unmatched_punctuation: char, comment: `Unmatched closing punctuation: ${char}` }); } } } } // Remaining unmatched opening punctuation in the stack if (stack.length > 0) { const unmatchedSet = new Set(); while (stack.length > 0) { const unmatched = stack.pop(); if (!unmatchedSet.has(unmatched.char)) { unmatchedSet.add(unmatched.char); issues.push({ verse: openVerse, unmatched_punctuation: unmatched.char, comment: `Unmatched opening punctuation: ${unmatched.char}` }); } } } return { check: 'unmatched_punctuation', issues }; }