twl-linker
Version:
Biblical Semantic Linker - Uses the biblical context database to create semantic links between USFM Bible text and biblical articles with confidence scoring.
32 lines (23 loc) • 1.25 kB
JavaScript
/* eslint-disable no-async-promise-executor, no-throw-literal */
// Note: This version doesn't use usfm-js to avoid external dependencies
// It implements a simple USFM alignment remover for the specific case
/**
* Simple USFM alignment remover that strips alignment markers while preserving text
* @param {string} usfmContent - The USFM content with potential alignment data
* @return {string} - Clean USFM content without alignment markers
*/
export const removeAlignments = (usfmContent) => {
if (!usfmContent) return '';
let cleanContent = usfmContent;
// Remove word-level alignment markers like \w word|lemma="lemma" strong="H1234"\w*
cleanContent = cleanContent.replace(/\\w\s+([^|\\]+)\|[^\\]*\\w\*/g, '$1');
// Remove milestone markers like \zaln-s | \zaln-e\*
cleanContent = cleanContent.replace(/\\zaln-[se][^\\]*\\?\*?/g, '');
// Remove other alignment-related markers
cleanContent = cleanContent.replace(/\\k-[se][^\\]*\\?\*?/g, '');
// Remove empty lines that might result from marker removal
cleanContent = cleanContent.replace(/\n\s*\n\s*\n/g, '\n\n');
// Clean up any remaining alignment syntax patterns
cleanContent = cleanContent.replace(/\|[^\\]*(?=\\)/g, '');
return cleanContent.trim();
};