bible-checker
Version:
A bible tool to run several checks with a target translation and a source tranlation
284 lines (272 loc) • 9.38 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.checkChapterVerseIntegrity = checkChapterVerseIntegrity;
exports.detectShortLongVerses = detectShortLongVerses;
exports.detectUnmatchedPunctuation = detectUnmatchedPunctuation;
exports.extractVerses = extractVerses;
/**
* Extracts verses from the USJ JSON format.
* @param {object} text - Parsed JSON object of the text.
* @returns {object} Map of verse IDs to their content.
*/
function extractVerses(text) {
const verses = {};
function traverseContent(content) {
for (const item of content) {
if (item.type === 'verse' && item.sid) {
verses[item.sid] = '';
} else if (typeof item === 'string') {
const lastVerseKey = Object.keys(verses).pop();
if (lastVerseKey) {
verses[lastVerseKey] += item;
}
} else if (item.content) {
traverseContent(item.content);
}
}
}
traverseContent(text.content);
return verses;
}
/**
* Extracts chapter and verse numbers from USJ content.
* @param {array} content - Content array of a USJ file.
* @returns {object} Map of chapters to arrays of verses.
*/
function extractChapterVerses(content) {
const chapters = {};
function traverse(items, currentChapter = null) {
for (const item of items) {
if (item.type === "chapter" && item.number) {
currentChapter = parseInt(item.number, 10);
if (!chapters[currentChapter]) {
chapters[currentChapter] = [];
}
} else if (item.type === "verse" && item.number) {
const verse = parseInt(item.number, 10);
if (currentChapter) {
chapters[currentChapter].push(verse);
}
} else if (item.content) {
traverse(item.content, currentChapter);
}
}
}
traverse(content);
return chapters;
}
/**
* Detects short, long, or empty verses based on length comparison between source and target.
* @param {object} source - Parsed JSON object of the source text.
* @param {object} target - Parsed JSON object of the target text.
* @param {number} threshold - Percentage difference to consider (default: 20%).
* @returns {object} Report of short/long verses and empty verses.
*/
function detectShortLongVerses(source, target, threshold = 20) {
const issues = [];
const sourceVerses = extractVerses(source);
const targetVerses = extractVerses(target);
for (const [key, sourceText] of Object.entries(sourceVerses)) {
const targetText = targetVerses[key] || '';
const sourceLength = sourceText.trim().length;
const targetLength = targetText.trim().length;
// Detect empty source or target verses
if (sourceLength === 0 && targetLength > 0) {
issues.push({
source_verse: key,
source_length: sourceLength,
target_length: targetLength,
difference: null,
comment: 'Source verse is empty, but target contains text.'
});
} else if (sourceLength > 0 && targetLength === 0) {
issues.push({
source_verse: key,
source_length: sourceLength,
target_length: targetLength,
difference: null,
comment: 'Target verse is empty, but source contains text.'
});
} else if (sourceLength > 0 && targetLength > 0) {
// Detect short or long verses
const diffPercentage = (targetLength - sourceLength) / sourceLength * 100;
if (Math.abs(diffPercentage) > threshold) {
issues.push({
source_verse: key,
source_length: sourceLength,
target_length: targetLength,
difference: parseFloat(diffPercentage.toFixed(2)),
comment: diffPercentage > 0 ? 'Target verse is too long compared to source.' : 'Target verse is too short compared to source.'
});
}
}
}
return {
check: 'short_long_verses',
issues
};
}
/**
* Checks for missing, duplicated, or out-of-order chapter/verse numbers.
* @param {object} source - Parsed JSON object of the source text.
* @param {object} target - Parsed JSON object of the target text.
* @returns {object} Report of chapter/verse integrity issues.
*/
function checkChapterVerseIntegrity(source, target) {
const issues = [];
const sourceChapterVerses = extractChapterVerses(source.content);
const targetChapterVerses = extractChapterVerses(target.content);
// Helper to validate order and duplication
function validateIntegrity(chapterVerses, textType) {
const seen = new Set();
let lastChapter = 0;
let lastVerse = 0;
for (let [chapter, verses] of Object.entries(chapterVerses)) {
chapter = parseInt(chapter, 10);
if (chapter < lastChapter) {
issues.push({
type: "out_of_order",
chapter,
message: `${textType} has out-of-order chapter ${chapter}.`
});
}
lastChapter = chapter;
for (const verse of verses) {
if (verse < lastVerse) {
console.log("verse ", verse);
issues.push({
type: "out_of_order",
chapter,
verse,
message: `${textType} has out-of-order verse ${verse} in chapter ${chapter}.`
});
}
if (seen.has(`${chapter}:${verse}`)) {
issues.push({
type: "duplicate",
chapter,
verse,
message: `${textType} has duplicate verse ${verse} in chapter ${chapter}.`
});
}
seen.add(`${chapter}:${verse}`);
lastVerse = verse;
}
}
}
// Validate both source and target
validateIntegrity(sourceChapterVerses, "Source");
validateIntegrity(targetChapterVerses, "Target");
// Detect missing verses in target
for (const [chapter, verses] of Object.entries(sourceChapterVerses)) {
const targetVerses = targetChapterVerses[chapter] || [];
const missingVerses = verses.filter(verse => !targetVerses.includes(verse));
for (const missingVerse of missingVerses) {
issues.push({
type: "missing",
chapter: parseInt(chapter, 10),
verse: missingVerse,
message: `Target is missing verse ${missingVerse} in chapter ${chapter}.`
});
}
}
return {
check: "chapter_verse_integrity",
issues
};
}
/**
* Detects unmatched punctuation pairs across verses (e.g., quotes, parentheses).
* @param {object} target - Parsed JSON object of the target text.
* @param {object|null} pair_punctuation_list - Optional custom punctuation pairs.
* @returns {object} Report of unmatched punctuation issues.
*/
function detectUnmatchedPunctuation(target, pair_punctuation_list = null) {
const issues = [];
const targetVerses = extractVerses(target);
// Define default punctuation pairs or use provided ones
let PAIR_PUNCTUATION = {
'(': ')',
'[': ']',
'{': '}',
'"': '"'
// "'": "'",
};
if (pair_punctuation_list !== null) {
PAIR_PUNCTUATION = pair_punctuation_list;
}
let stack = []; // Shared stack for punctuation tracking
let toggles = {}; // Toggles for characters that are the same for opening and closing
let openVerse = null; // Keeps track of the verse where punctuation started
// Initialize toggles for symmetric punctuation
for (const char of Object.keys(PAIR_PUNCTUATION)) {
if (PAIR_PUNCTUATION[char] === char) {
// false means "not inside"
toggles[char] = false;
}
}
let yeet = 0;
for (const [key, text] of Object.entries(targetVerses)) {
for (const char of text) {
if (PAIR_PUNCTUATION[char]) {
if (PAIR_PUNCTUATION[char] === char) {
// Handle symmetric punctuation using toggles
toggles[char] = !toggles[char];
if (toggles[char]) {
// Entering a symmetric punctuation
if (stack.length === 0) openVerse = key;
stack.push(char);
} else {
// Exiting a symmetric punctuation
const last = stack.pop();
if (!last || last !== char) {
issues.push({
verse: key,
unmatched_punctuation: char,
comment: `Unmatched closing punctuation: ${char}`
});
}
}
} else {
// Handle asymmetric punctuation (e.g., (), {}, etc.)
if (stack.length === 0) {
openVerse = key;
}
stack.push(char);
}
} else if (Object.values(PAIR_PUNCTUATION).includes(char)) {
// Handle closing punctuation
const last = stack.pop();
if (!last || PAIR_PUNCTUATION[last] !== char) {
// Unmatched closing punctuation
issues.push({
verse: key,
unmatched_punctuation: char,
comment: `Unmatched closing punctuation: ${char}`
});
}
}
}
}
// Remaining unmatched opening punctuation in the stack
if (stack.length > 0) {
const unmatchedSet = new Set();
while (stack.length > 0) {
const unmatched = stack.pop();
if (!unmatchedSet.has(unmatched.char)) {
unmatchedSet.add(unmatched.char);
issues.push({
verse: openVerse,
unmatched_punctuation: unmatched.char,
comment: `Unmatched opening punctuation: ${unmatched.char}`
});
}
}
}
return {
check: 'unmatched_punctuation',
issues
};
}