UNPKG

pdfmake-rtl

Version:

Enhanced PDFMake with automatic RTL (Arabic/Persian/Urdu) support - Complete standalone package (beta)

541 lines (470 loc) 14.5 kB
'use strict'; /** * RTL (Right-to-Left) utilities for handling Arabic, Persian (Farsi), and Urdu languages */ // Unicode ranges for Arabic script (includes Persian and Urdu characters) var ARABIC_RANGE = [ [0x0600, 0x06FF], // Arabic block [0x0750, 0x077F], // Arabic Supplement [0x08A0, 0x08FF], // Arabic Extended-A [0xFB50, 0xFDFF], // Arabic Presentation Forms-A [0xFE70, 0xFEFF] // Arabic Presentation Forms-B ]; // Unicode ranges for Persian (Farsi) specific characters var PERSIAN_RANGE = [ [0x06A9, 0x06AF], // Persian Kaf, Gaf [0x06C0, 0x06C3], // Persian Heh, Teh Marbuta variants [0x06CC, 0x06CE], // Persian Yeh variants [0x06D0, 0x06D5], // Persian Yeh Barree, Arabic-Indic digits [0x200C, 0x200D] // Zero Width Non-Joiner, Zero Width Joiner (used in Persian) ]; // Unicode ranges for Urdu specific characters var URDU_RANGE = [ [0x0679, 0x0679], // Urdu Tteh [0x067E, 0x067E], // Urdu Peh [0x0686, 0x0686], // Urdu Tcheh [0x0688, 0x0688], // Urdu Ddal [0x0691, 0x0691], // Urdu Rreh [0x0698, 0x0698], // Urdu Jeh [0x06A9, 0x06A9], // Urdu Keheh [0x06AF, 0x06AF], // Urdu Gaf [0x06BA, 0x06BA], // Urdu Noon Ghunna [0x06BE, 0x06BE], // Urdu Heh Doachashmee [0x06C1, 0x06C1], // Urdu Heh Goal [0x06D2, 0x06D2], // Urdu Yeh Barree [0x06D3, 0x06D3] // Urdu Yeh Barree with Hamza ]; // Strong RTL characters (Arabic, Persian, Urdu) var RTL_CHARS = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u200C-\u200D]/; // Strong LTR characters (Latin, etc.) var LTR_CHARS = /[A-Za-z\u00C0-\u024F\u1E00-\u1EFF]/; /** * Check if a character is in Arabic script (includes Persian and Urdu) * @param {string} char - Single character to check * @returns {boolean} - True if character is Arabic/Persian/Urdu */ function isArabicChar(char) { var code = char.charCodeAt(0); return ARABIC_RANGE.some(function (range) { return code >= range[0] && code <= range[1]; }); } /** * Check if a character is in Persian (Farsi) script * @param {string} char - Single character to check * @returns {boolean} - True if character is Persian */ function isPersianChar(char) { var code = char.charCodeAt(0); return PERSIAN_RANGE.some(function (range) { return code >= range[0] && code <= range[1]; }) || isArabicChar(char); // Persian uses Arabic base + extensions } /** * Check if a character is in Urdu script * @param {string} char - Single character to check * @returns {boolean} - True if character is Urdu */ function isUrduChar(char) { var code = char.charCodeAt(0); return URDU_RANGE.some(function (range) { return code >= range[0] && code <= range[1]; }) || isArabicChar(char); // Urdu uses Arabic base + extensions } /** * Check if a character requires RTL rendering * @param {string} char - Single character to check * @returns {boolean} - True if character requires RTL */ function isRTLChar(char) { return RTL_CHARS.test(char); } /** * Check if a character is strongly LTR * @param {string} char - Single character to check * @returns {boolean} - True if character is strongly LTR */ function isLTRChar(char) { return LTR_CHARS.test(char); } /** * Determine the predominant text direction of a string * @param {string} text - Text to analyze * @returns {string} - 'rtl', 'ltr', or 'neutral' */ function getTextDirection(text) { if (!text || typeof text !== 'string') { return 'neutral'; } var rtlCount = 0; var ltrCount = 0; for (var i = 0; i < text.length; i++) { var char = text.charAt(i); if (isRTLChar(char)) { rtlCount++; } else if (isLTRChar(char)) { ltrCount++; } } // If we have any strong directional characters if (rtlCount > 0 || ltrCount > 0) { if (rtlCount > ltrCount) { return 'rtl'; } else if (ltrCount > rtlCount) { return 'ltr'; } else { // Equal counts - slight preference for RTL if both exist return rtlCount > 0 ? 'rtl' : 'ltr'; } } return 'neutral'; } function getNormalizedChars(fixed) { return fixed.map(char => { if (char === '(') return ')'; if (char === ')') return '('; if (char === '[') return ']'; if (char === ']') return '['; return char; }); } /** * Check if text contains any RTL characters * @param {string} text - Text to check * @returns {boolean} - True if text contains RTL characters */ function containsRTL(text) { if (!text || typeof text !== 'string') { return false; } return RTL_CHARS.test(text); } /** * Check if text is primarily Arabic, Persian, or Urdu * @param {string} text - Text to check * @returns {boolean} - True if text is primarily Arabic/Persian/Urdu */ function isArabicText(text) { if (!text || typeof text !== 'string') { return false; } var rtlCount = 0; var totalStrongChars = 0; for (var i = 0; i < text.length; i++) { var char = text.charAt(i); if (isArabicChar(char) || isPersianChar(char) || isUrduChar(char)) { rtlCount++; totalStrongChars++; } else if (isRTLChar(char) || isLTRChar(char)) { totalStrongChars++; } } // If we have any strong characters and RTL represents at least 30% // (lowered threshold for mixed text) return totalStrongChars > 0 && (rtlCount / totalStrongChars) >= 0.3; } /** * Process RTL text for proper display * For modern PDF libraries, we rely on the underlying engine for BiDi processing * We should NOT reverse word order manually - that breaks Arabic text * @param {string} text - Text to process * @returns {string} - Text (unchanged for proper BiDi handling) */ function reverseRTLText(text) { if (!text || typeof text !== 'string') { return text; } // DO NOT reverse Arabic text word order! // Arabic text should maintain its natural word order // Only the display direction (alignment) should be RTL // The PDF engine handles proper BiDi rendering return text; } /** * Apply RTL processing to text if needed * @param {string} text - Original text * @param {string} direction - Explicit direction override ('rtl', 'ltr', or null) * @returns {Object} - { text: processedText, isRTL: boolean } */ function processRTLText(text, direction) { if (!text || typeof text !== 'string' || getTextDirection(text) !== 'rtl') { return { text: text, isRTL: false }; } var isRTL = false; if (direction === 'rtl') { isRTL = true; } else if (direction === 'ltr') { isRTL = false; } else { // Auto-detect direction var textDir = getTextDirection(text); isRTL = textDir === 'rtl'; } // Keep original text - no word reversal needed // The PDF engine handles proper BiDi rendering return { text: text, isRTL: isRTL }; } /** * Reverse table row cells for RTL layout * @param {Array} row - Table row array * @returns {Array} - Reversed row array */ function reverseTableRow(row) { if (!Array.isArray(row)) { return row; } return row.slice().reverse(); } /** * Process table for RTL layout if supportRTL is enabled * @param {Object} tableNode - Table definition object * @returns {Object} - Processed table node */ function processRTLTable(tableNode) { if (!tableNode || !tableNode.table.supportRTL || !tableNode.table || !tableNode.table.body) { return tableNode; } // Don't clone the entire object - just modify the table data in place // Reverse each row in the table body for RTL layout tableNode.table.body = tableNode.table.body.map(function (row) { return reverseTableRow(row); }); // Also reverse the widths array if it exists if (tableNode.table.widths && Array.isArray(tableNode.table.widths)) { tableNode.table.widths = tableNode.table.widths.slice().reverse(); } return tableNode; } /** * Apply automatic RTL detection and formatting to any text element * @param {Object|string} element - Text element or string * @returns {Object} - Enhanced element with RTL properties */ function autoApplyRTL(element) { if (!element) return element; // Handle string elements if (typeof element === 'string') { var direction = getTextDirection(element); if (direction === 'rtl') { return { text: element, alignment: 'right', font: 'Nillima' // Use Arabic font for RTL text }; } return element; } // Handle object elements if (typeof element === 'object' && element.text) { var textDirection = getTextDirection(element.text); if (textDirection === 'rtl') { // Auto-apply RTL properties if not already set if (!element.alignment) { element.alignment = 'right'; } if (!element.font && isArabicText(element.text)) { element.font = 'Nillima'; } } else if (textDirection === 'ltr') { // Auto-apply LTR properties if not already set if (!element.alignment) { element.alignment = 'left'; } if (!element.font) { element.font = 'Roboto'; } } } return element; } /** * Process list items for RTL support including bullet positioning * @param {Array|Object} listItems - ul/ol content * @returns {Array|Object} - Processed list with RTL support */ function processRTLList(listItems) { if (!listItems) return listItems; function processListItem(item) { if (typeof item === 'string') { var direction = getTextDirection(item); if (direction === 'rtl') { return { text: item, alignment: 'right', font: 'Nillima', markerColor: '#2c5282' }; } return item; } if (typeof item === 'object') { // Process the main text if (item.text) { var textDirection = getTextDirection(item.text); if (textDirection === 'rtl') { if (!item.alignment) item.alignment = 'right'; if (!item.font && isArabicText(item.text)) item.font = 'Nillima'; if (!item.markerColor) item.markerColor = '#2c5282'; } } // Process nested ul/ol recursively if (item.ul) { item.ul = processRTLList(item.ul); } if (item.ol) { item.ol = processRTLList(item.ol); } } return item; } if (Array.isArray(listItems)) { return listItems.map(processListItem); } return processListItem(listItems); } /** * Process table for automatic RTL detection and layout * @param {Object} tableNode - Table definition object * @returns {Object} - Processed table node */ function processAutoRTLTable(tableNode) { if (!tableNode || !tableNode.table || !tableNode.table.body) { return tableNode; } // Check if table contains RTL content var hasRTLContent = false; var rtlCellCount = 0; var totalCells = 0; tableNode.table.body.forEach(function (row) { if (Array.isArray(row)) { row.forEach(function (cell) { totalCells++; var cellText = typeof cell === 'string' ? cell : (cell && cell.text ? cell.text : ''); if (containsRTL(cellText)) { rtlCellCount++; } }); } }); // If more than 30% of cells contain RTL content, treat as RTL table hasRTLContent = totalCells > 0 && (rtlCellCount / totalCells) >= 0.3; if (hasRTLContent || tableNode.table.supportRTL) { // Reverse table columns for RTL layout tableNode.table.body = tableNode.table.body.map(function (row) { return reverseTableRow(row); }); // Reverse widths if defined if (tableNode.table.widths && Array.isArray(tableNode.table.widths)) { tableNode.table.widths = tableNode.table.widths.slice().reverse(); } // Auto-apply RTL styles to cells tableNode.table.body = tableNode.table.body.map(function (row) { if (Array.isArray(row)) { return row.map(function (cell) { return autoApplyRTL(cell); }); } return row; }); } else { // For non-RTL tables, still auto-apply font and alignment per cell tableNode.table.body = tableNode.table.body.map(function (row) { if (Array.isArray(row)) { return row.map(function (cell) { return autoApplyRTL(cell); }); } return row; }); } return tableNode; } /** * Process any document element for automatic RTL detection * @param {Object|Array|string} element - Document element * @returns {Object|Array|string} - Processed element */ function processAutoRTLElement(element) { if (!element) return element; // Handle arrays (like content arrays) if (Array.isArray(element)) { return element.map(processAutoRTLElement); } // Handle text elements if (typeof element === 'string' || (element && element.text)) { element = autoApplyRTL(element); } // Handle tables if (element && element.table) { element = processAutoRTLTable(element); } // Handle lists if (element && element.ul) { element.ul = processRTLList(element.ul); } if (element && element.ol) { element.ol = processRTLList(element.ol); } // Handle columns if (element && element.columns && Array.isArray(element.columns)) { element.columns = element.columns.map(processAutoRTLElement); } return element; } function fixArabicTextUsingReplace(text) { //if start with point remove if (text.startsWith('.')) { text = text.slice(1); } text = text .replace(/\(/g, 'TEMP_OPEN_PAREN') .replace(/\)/g, '(') .replace(/TEMP_OPEN_PAREN/g, ')') .replace(/\[/g, 'TEMP_OPEN_SQUARE') .replace(/\]/g, '[') .replace(/TEMP_OPEN_SQUARE/g, ']') .replace(/\{/g, 'TEMP_OPEN_CURLY') .replace(/\}/g, '{') .replace(/TEMP_OPEN_CURLY/g, '}'); return text; } /* * Reverse a table row while preserving colSpan group semantics. * This function correctly handles colSpan by keeping the span cell at the start * of its group after reversal, maintaining proper header alignment. * * @param { Array } row - The original row array(cells may include colSpan / rowSpan). * @returns { Array } - A new row array reversed for RTL with correct colSpan placement. */ function reverseTableRowPreserveSpans(row) { if (!Array.isArray(row)) return row; var n = row.length; if (n === 0) return row; // For simple reversal that maintains colSpan structure: // Just reverse the array, but this works better with how pdfmake handles spans return row.slice().reverse(); } module.exports = { isArabicChar: isArabicChar, isPersianChar: isPersianChar, isUrduChar: isUrduChar, isRTLChar: isRTLChar, isLTRChar: isLTRChar, getTextDirection: getTextDirection, containsRTL: containsRTL, isArabicText: isArabicText, reverseRTLText: reverseRTLText, processRTLText: processRTLText, reverseTableRow: reverseTableRow, processRTLTable: processRTLTable, autoApplyRTL: autoApplyRTL, processRTLList: processRTLList, reverseTableRowPreserveSpans: reverseTableRowPreserveSpans, processAutoRTLTable: processAutoRTLTable, processAutoRTLElement: processAutoRTLElement, fixArabicTextUsingReplace: fixArabicTextUsingReplace, getNormalizedChars: getNormalizedChars };