UNPKG

node-parabaik

Version:
459 lines (364 loc) 15.2 kB
// Parabaik Myanmar Text Converter (Zawgyi <> Unicode) // Copyright (C) 2014 Ngwe TUN (Solveware Solution) // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License along // with this program; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. exports.unicode_to_zawgyi = function(input) { var output = input; output = output.replace(/\u104E\u1004\u103A\u1038/g, '\u104E'); output = output.replace(/\u102B\u103A/g, '\u105A'); output = output.replace(/\u102D\u1036/g, '\u108E'); output = output.replace(/\u103F/g, '\u1086'); output = output.replace(/(\u102F[\u1036]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1094' : $0 + $1; } ); output = output.replace(/(\u1030[\u1036]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1094' : $0 + $1; } ); output = output.replace(/(\u1014[\u103A\u1032]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1094' : $0 + $1; } ); output = output.replace(/(\u103B[\u1032\u1036]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1095' : $0 + $1; } ); output = output.replace(/(\u103D[\u1032]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1095' : $0 + $1; } ); output = output.replace(/([\u103B\u103C\u103D][\u102D\u1036]?)\u102F/g, function($0, $1) { return $1 ? $1 + '\u1033' : $0 + $1; } ); output = output.replace(/((\u1039[\u1000-\u1021])[\u102D\u1036]?)\u102F/g, function($0, $1) { return $1 ? $1 + '\u1033' : $0 + $1; } ); output = output.replace(/([\u100A\u100C\u1020\u1025\u1029][\u102D\u1036]?)\u102F/g, function($0, $1) { return $1 ? $1 + '\u1033' : $0 + $1; } ); output = output.replace(/([\u103B\u103C][\u103D]?[\u103E]?[\u102D\u1036]?)\u1030/g, function($0, $1) { return $1 ? $1 + '\u1034' : $0 + $1; } ); // uu - 2 output = output.replace(/((\u1039[\u1000-\u1021])[\u102D\u1036]?)\u1030/g, function($0, $1) { return $1 ? $1 + '\u1034' : $0 + $1; } ); // uu - 2 output = output.replace(/([\u100A\u100C\u1020\u1025\u1029][\u102D\u1036]?)\u1030/g, function($0, $1) { return $1 ? $1 + '\u1034' : $0 + $1; } ); // uu - 2 output = output.replace(/(\u103C)\u103E/g, function($0, $1) { return $1 ? $1 + '\u1087' : $0 + $1; } ); // ha - 2 output = output.replace(/\u1009(?=[\u103A])/g, '\u1025'); output = output.replace(/\u1009(?=\u1039[\u1000-\u1021])/g, '\u1025'); // E render output = output.replace( /([\u1000-\u1021\u1029])(\u1039[\u1000-\u1021])?([\u103B-\u103E\u1087]*)?\u1031/g, "\u1031$1$2$3"); // Ra render output = output.replace( /([\u1000-\u1021\u1029])(\u1039[\u1000-\u1021\u1000-\u1021])?(\u103C)/g, "$3$1$2"); // Kinzi output = output.replace(/\u1004\u103A\u1039/g, "\u1064"); // kinzi output = output.replace(/(\u1064)([\u1031]?)([\u103C]?)([\u1000-\u1021])\u102D/g, "$2$3$4\u108B"); // reordering kinzi lgt output = output.replace(/(\u1064)(\u1031)?(\u103C)?([ \u1000-\u1021])\u102E/g, "$2$3$4\u108C"); // reordering kinzi lgtsk output = output.replace(/(\u1064)(\u1031)?(\u103C)?([ \u1000-\u1021])\u1036/g, "$2$3$4\u108D"); // reordering kinzi ttt output = output.replace(/(\u1064)(\u1031)?(\u103C)?([ \u1000-\u1021])/g, "$2$3$4\u1064"); // reordering kinzi // Consonant output = output.replace(/\u100A(?=[\u1039\u102F\u1030])/g, "\u106B"); // nnya - 2 output = output.replace(/\u100A/g, "\u100A"); // nnya output = output.replace(/\u101B(?=[\u102F\u1030])/g, "\u1090"); // ra - 2 output = output.replace(/\u101B/g, "\u101B"); // ra output = output.replace(/\u1014(?=[\u1039\u103D\u103E\u102F\u1030])/g, "\u108F"); // na - 2 output = output.replace(/\u1014/g, "\u1014"); // na // Stacked consonants output = output.replace(/\u1039\u1000/g, "\u1060"); output = output.replace(/\u1039\u1001/g, "\u1061"); output = output.replace(/\u1039\u1002/g, "\u1062"); output = output.replace(/\u1039\u1003/g, "\u1063"); output = output.replace(/\u1039\u1005/g, "\u1065"); output = output.replace(/\u1039\u1006/g, "\u1066"); // 1067 output = output.replace(/([\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108F\u1015\u1016\u1017\u1019\u101D])\u1066/g, function($0, $1) { return $1 ? $1 + '\u1067' : $0 + $1; } ); // 1067 output = output.replace(/\u1039\u1007/g, "\u1068"); output = output.replace(/\u1039\u1008/g, "\u1069"); output = output.replace(/\u1039\u100F/g, "\u1070"); output = output.replace(/\u1039\u1010/g, "\u1071"); // 1072 omit (little shift to right) output = output.replace(/([\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108F\u1015\u1016\u1017\u1019\u101D])\u1071/g, function($0, $1) { return $1 ? $1 + '\u1072' : $0 + $1; } ); // 1067 output = output.replace(/\u1039\u1011/g, "\u1073"); // \u1074 omit(little shift to right) output = output.replace(/([\u1001\u1002\u1004\u1005\u1007\u1012\u1013\u108F\u1015\u1016\u1017\u1019\u101D])\u1073/g, function($0, $1) { return $1 ? $1 + '\u1074' : $0 + $1; } ); // 1067 output = output.replace(/\u1039\u1012/g, "\u1075"); output = output.replace(/\u1039\u1013/g, "\u1076"); output = output.replace(/\u1039\u1014/g, "\u1077"); output = output.replace(/\u1039\u1015/g, "\u1078"); output = output.replace(/\u1039\u1016/g, "\u1079"); output = output.replace(/\u1039\u1017/g, "\u107A"); output = output.replace(/\u1039\u1018/g, "\u107B"); output = output.replace(/\u1039\u1019/g, "\u107C"); output = output.replace(/\u1039\u101C/g, "\u1085"); output = output.replace(/\u100F\u1039\u100D/g, "\u1091"); output = output.replace(/\u100B\u1039\u100C/g, "\u1092"); output = output.replace(/\u1039\u100C/g, "\u106D"); output = output.replace(/\u100B\u1039\u100B/g, "\u1097"); output = output.replace(/\u1039\u100B/g, "\u106C"); output = output.replace(/\u100E\u1039\u100D/g, "\u106F"); output = output.replace(/\u100D\u1039\u100D/g, "\u106E"); output = output.replace(/\u1009(?=\u103A)/g, "\u1025"); // u output = output.replace(/\u1025(?=[\u1039\u102F\u1030])/g, "\u106A"); // u - 2 output = output.replace(/\u1025/g, "\u1025"); // u ///////////////////////////////////// output = output.replace(/\u103A/g, "\u1039"); // asat output = output.replace(/\u103B\u103D\u103E/g, "\u107D\u108A"); // ya wa ha output = output.replace(/\u103D\u103E/g, "\u108A"); // wa ha output = output.replace(/\u103B/g, "\u103A"); // ya output = output.replace(/\u103C/g, "\u103B"); // ra output = output.replace(/\u103D/g, "\u103C"); // wa output = output.replace(/\u103E/g, "\u103D"); // ha output = output.replace(/\u103A(?=[\u103C\u103D\u108A])/g, "\u107D"); // ya - 2 output = output.replace(/(\u100A(?:[\u102D\u102E\u1036\u108B\u108C\u108D\u108E])?)\u103D/g, function($0, $1) { // return $1 ? $1 + '\u1087 ' : $0 + $1; return $1 ? $1 + '\u1087' : $0 ; } ); // ha - 2 output = output.replace(/\u103B(?=[\u1000\u1003\u1006\u100F\u1010\u1011\u1018\u101A\u101C\u101E\u101F\u1021])/g, "\u107E"); // great Ra with wide consonants output = output.replace(/\u107E([\u1000-\u1021\u108F])(?=[\u102D\u102E\u1036\u108B\u108C\u108D\u108E])/g, "\u1080$1"); // great Ra with upper sign output = output.replace(/\u107E([\u1000-\u1021\u108F])(?=[\u103C\u108A])/g, "\u1082$1"); // great Ra with under signs output = output.replace(/\u103B([\u1000-\u1021\u108F])(?=[\u102D \u102E \u1036 \u108B \u108C \u108D \u108E])/g, "\u107F$1"); // little Ra with upper sign output = output.replace(/\u103B([\u1000-\u1021\u108F])(?=[\u103C\u108A])/g, "\u1081$1"); // little Ra with under signs output = output.replace(/(\u1014[\u103A\u1032]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1094' : $0 + $1; } ); // aukmyint output = output.replace(/(\u1033[\u1036]?)\u1094/g, function($0, $1) { return $1 ? $1 + '\u1095' : $0 + $1; } ); // aukmyint output = output.replace(/(\u1034[\u1036]?)\u1094/g, function($0, $1) { return $1 ? $1 + '\u1095' : $0 + $1; } ); // aukmyint output = output.replace(/([\u103C\u103D\u108A][\u1032]?)\u1037/g, function($0, $1) { return $1 ? $1 + '\u1095' : $0 + $1; } ); // aukmyint return output; }; exports.zawgyi_to_unicode = function(input) { var output=input; var tallAA = "\u102B"; var AA = "\u102C"; var vi = "\u102D"; // lone gyi tin var ii = "\u102E"; var u = "\u102F"; var uu = "\u1030"; var ve = "\u1031"; var ai = "\u1032"; var ans = "\u1036"; var db = "\u1037"; var visarga = "\u1038"; var asat = "\u103A"; var ya = "\u103B"; var ra = "\u103C"; var wa = "\u103D"; var ha = "\u103E"; var zero = "\u1040"; output = output.replace( /\u106A/g, " \u1009"); output = output.replace( /\u1025(?=[\u1039\u102C])/g, "\u1009"); //new output = output.replace( /\u1025\u102E/g, "\u1026"); //new output = output.replace( /\u106B/g, "\u100A"); output = output.replace( /\u1090/g, "\u101B"); output = output.replace( /\u1040/g, zero); output = output.replace( /\u108F/g, "\u1014"); output = output.replace( /\u1012/g, "\u1012"); output = output.replace( /\u1013/g, "\u1013"); ///////////// output = output.replace( /[\u103D\u1087]/g, ha); // ha output = output.replace( /\u103C/g, wa); // wa output = output.replace( /[\u103B\u107E\u107F\u1080\u1081\u1082\u1083\u1084]/g, ra); // ya yint(ra) output = output.replace( /[\u103A\u107D]/g, ya); // ya output = output.replace( /\u103E\u103B/g, ya + ha); // reorder output = output.replace( /\u108A/g, wa + ha); output = output.replace( /\u103E\u103D/g, wa + ha); // wa ha ////////////////////// Reordering output = output.replace( /(\u1031)?(\u103C)?([\u1000-\u1021])\u1064/g, "\u1064$1$2$3"); // reordering kinzi output = output.replace( /(\u1031)?(\u103C)?([\u1000-\u1021])\u108B/g, "\u1064$1$2$3\u102D"); // reordering kinzi lgt output = output.replace( /(\u1031)?(\u103C)?([\u1000-\u1021])\u108C/g, "\u1064$1$2$3\u102E"); // reordering kinzi lgtsk output = output.replace( /(\u1031)?(\u103C)?([\u1000-\u1021])\u108D/g, "\u1064$1$2$3\u1036"); // reordering kinzi ttt //////////////////////////////////////// output = output.replace( /\u105A/g, tallAA + asat); output = output.replace( /\u108E/g, vi + ans); // lgt ttt output = output.replace( /\u1033/g, u); output = output.replace( /\u1034/g, uu); output = output.replace( /\u1088/g, ha+u); // ha u output = output.replace( /\u1089/g, ha+uu); // ha uu /////////////////////////////////////// output = output.replace( /\u1039/g, "\u103A"); output = output.replace( /[\u1094\u1095]/g, db); // aukmyint ///////////////////////////////////////Pasint order human error output = output.replace( /([\u1000-\u1021])([\u102C\u102D\u102E\u1032\u1036]){1,2}([\u1060\u1061\u1062\u1063\u1065\u1066\u1067\u1068\u1069\u1070\u1071\u1072\u1073\u1074\u1075\u1076\u1077\u1078\u1079\u107A\u107B\u107C\u1085])/g, "$1$3$2"); //new ///////////// output = output.replace( /\u1064/g, "\u1004\u103A\u1039"); output = output.replace( /\u104E/g, "\u104E\u1004\u103A\u1038"); output = output.replace( /\u1086/g, "\u103F"); output = output.replace( /\u1060/g, '\u1039\u1000'); output = output.replace( /\u1061/g, '\u1039\u1001'); output = output.replace( /\u1062/g, '\u1039\u1002'); output = output.replace( /\u1063/g, '\u1039\u1003'); output = output.replace( /\u1065/g, '\u1039\u1005'); output = output.replace( /[\u1066\u1067]/g, '\u1039\u1006'); output = output.replace( /\u1068/g, '\u1039\u1007'); output = output.replace( /\u1069/g, '\u1039\u1008'); output = output.replace( /\u106C/g, '\u1039\u100B'); output = output.replace( /\u1070/g, '\u1039\u100F'); output = output.replace( /[\u1071\u1072]/g, '\u1039\u1010'); output = output.replace( /[\u1073\u1074]/g, '\u1039\u1011'); output = output.replace( /\u1075/g, '\u1039\u1012'); output = output.replace( /\u1076/g, '\u1039\u1013'); output = output.replace( /\u1077/g, '\u1039\u1014'); output = output.replace( /\u1078/g, '\u1039\u1015'); output = output.replace( /\u1079/g, '\u1039\u1016'); output = output.replace( /\u107A/g, '\u1039\u1017'); output = output.replace( /\u107B/g, '\u1039\u1018'); output = output.replace( /\u107C/g, '\u1039\u1019'); output = output.replace( /\u1085/g, '\u1039\u101C'); output = output.replace( /\u106D/g, '\u1039\u100C'); output = output.replace( /\u1091/g, '\u100F\u1039\u100D'); output = output.replace( /\u1092/g, '\u100B\u1039\u100C'); output = output.replace( /\u1097/g, '\u100B\u1039\u100B'); output = output.replace( /\u106F/g, '\u100E\u1039\u100D'); output = output.replace( /\u106E/g, '\u100D\u1039\u100D'); ///////////////////////////////////////////////////////// output = output.replace( /(\u103C)([\u1000-\u1021])(\u1039[\u1000-\u1021])?/g, "$2$3$1"); // reordering ra //output = output.replace( /(\u103E)?(\u103D)?([\u103B\u103C])/g, "$3$2$1"); // reordering ra output = output.replace( /(\u103E)(\u103D)([\u103B\u103C])/g, "$3$2$1"); output = output.replace( /(\u103E)([\u103B\u103C])/g, "$2$1"); output = output.replace( /(\u103D)([\u103B\u103C])/g, "$2$1"); output = output.replace(/(([\u1000-\u101C\u101E-\u102A\u102C\u102E-\u103F\u104C-\u109F]))(\u1040)(?=\u0020)?/g, function($0, $1) { return $1 ? $1 + '\u101D' : $0 + $1; } ); // zero and wa output = output.replace(/((\u101D))(\u1040)(?=\u0020)?/g, function($0, $1) { return $1 ? $1 + '\u101D' : $0 + $1; } ); // zero and wa output = output.replace(/(([\u1000-\u101C\u101E-\u102A\u102C\u102E-\u103F\u104C-\u109F\u0020]))(\u1047)/g, function($0, $1) { return $1 ? $1 + '\u101B' : $0 + $1; } ); // seven and ra output = output.replace( /(\u1047)( ? = [\u1000 - \u101C\u101E - \u102A\u102C\u102E - \u103F\u104C - \u109F\u0020])/g, "\u101B"); // seven and ra /* output = output.replace( /(\u1031)?([\u1000-\u1021])(\u1039[\u1000-\u1021])?([\u102D\u102E\u1032])?([\u1036\u1037\u1038]{0,2})([\u103B-\u103E]{0,3})([\u102F\u1030])?([\u102D\u102E\u1032])?/g, "$2$3$6$1$4$8$7$5"); // reordering storage order*/ output = output.replace( /(\u1031)?([\u1000-\u1021])(\u1039[\u1000-\u1021])?([\u102D\u102E\u1032])?([\u1036\u1037\u1038]{0,2})([\u103B-\u103E]{0,3})([\u102F\u1030])?([\u1036\u1037\u1038]{0,2})([\u102D\u102E\u1032])?/g, "$2$3$6$1$4$9$7$5$8"); // reordering storage order output = output.replace(ans+u, u+ans); output = output.replace( /(\u103A)(\u1037)/g, "$2$1"); // For Latest Myanmar3 return output; };