persian-normalizer
Version:
Normalize Persian Text
74 lines (73 loc) • 2.66 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.extractNumbers = exports.persianToEnglishNumber = void 0;
var numbersMaping = __importStar(require("../utf8-codes/number-maping.json"));
var spaces_1 = require("../spaces");
/**
* @param {String} text
* @returns {String}
* @description Convert All numbers including Persian and Arabic numbers to English numbers.
* This function keep characters that are not numbers.
*/
var persianToEnglishNumber = function (text) {
if (!text || text === "")
return;
var normalized = text;
// Arabic digits
normalized = normalized.replace(/[\u0660-\u0669]/g, function (c) {
return (c.charCodeAt(0) - 0x0660).toString();
});
// Persian digits
normalized = normalized.replace(/[\u06f0-\u06f9]/g, function (c) {
return (c.charCodeAt(0) - 0x0660).toString();
});
return normalized;
};
exports.persianToEnglishNumber = persianToEnglishNumber;
/**
* @param {String} text
* @returns {String} text
* @descriptions Into the json file,
* - First one is english unicode,
* - Seocnd One is persian unicode
* - Third one is arabic unicode
*/
var extractNumbers = function (text) {
if (!text || text === "")
return;
var normalized = text;
normalized = text
.toString()
.split("")
.map(function (c) {
var mapped = numbersMaping[c.charCodeAt(0)];
return typeof mapped === "undefined" ? "" : mapped;
})
.join("");
normalized = (0, spaces_1.removeSpaces)(normalized);
return normalized;
};
exports.extractNumbers = extractNumbers;