numberify-converter
Version:
Convert numbers from text in various languages into their numeric equivalent.
131 lines (130 loc) • 5.05 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.numberifyString = void 0;
const en_1 = require("./en");
const fr_1 = require("./fr");
function normalizeString(str) {
return str
.toLowerCase()
.normalize("NFD")
.replace(/[\u0300-\u036f]/g, "");
}
function processWordsToNumbers(words, dict) {
return words.map((word) => {
const normalizedWord = normalizeString(word);
// Cas simple : mot complet dans le dictionnaire
if (dict[normalizedWord] !== undefined) {
return dict[normalizedWord].toString();
}
if (dict[word] !== undefined) {
return dict[word].toString();
}
// Traitement des mots composés avec tirets
const parts = word.split("-");
let sum = 0;
let allPartsValid = true;
// Gestion spéciale pour "quatre-vingt" (80 et au-delà)
if (parts.length >= 2 &&
normalizeString(parts[0]) === "quatre" &&
normalizeString(parts[1]) === "vingt") {
sum = 80; // 4 * 20
if (parts.length === 3) {
const thirdPart = normalizeString(parts[2]);
const thirdValue = dict[thirdPart] !== undefined ? dict[thirdPart] : dict[parts[2]];
if (thirdValue !== undefined) {
sum += thirdValue; // Ajoute la partie finale (ex. "trois" pour 83)
}
else {
allPartsValid = false;
}
}
}
else {
// Logique pour les autres cas (addition simple)
for (const part of parts) {
const normalizedPart = normalizeString(part);
if (dict[normalizedPart] === undefined && dict[part] === undefined) {
allPartsValid = false;
break;
}
const partValue = dict[normalizedPart] !== undefined
? dict[normalizedPart]
: dict[part];
sum += partValue;
}
}
return allPartsValid ? sum.toString() : word;
});
}
function applyMultipliers(tokens, multiplierDict, lang) {
let total = 0;
let current = 0;
let pendingNumeric = false; // tracks that we have processed at least one numeric token
const result = [];
// In French, skip filler words like "et"
const isFiller = (token) => lang === "fr" && normalizeString(token) === "et";
for (const token of tokens) {
if (isFiller(token))
continue;
const num = Number(token);
if (!isNaN(num)) {
pendingNumeric = true;
current += num;
}
else if (multiplierDict[normalizeString(token)] !== undefined ||
multiplierDict[token] !== undefined) {
// Get the multiplier either via the normalized or original token.
const normalizedToken = multiplierDict[normalizeString(token)] !== undefined
? normalizeString(token)
: token;
const multiplier = multiplierDict[normalizedToken];
if (multiplier < 1000) {
if (!pendingNumeric) {
current = 1;
pendingNumeric = true;
}
current *= multiplier;
}
else {
if (!pendingNumeric) {
current = 1;
pendingNumeric = true;
}
current *= multiplier;
total += current;
current = 0;
// Remove the reset of pendingNumeric here so that we remember we've processed numeric tokens
}
}
else {
// On a non-numeric token, flush the current number if any.
if (pendingNumeric) {
result.push((total + current).toString());
total = 0;
current = 0;
pendingNumeric = false;
}
result.push(token);
}
}
// If the entire sentence was numeric, flush the final accumulated value.
if (pendingNumeric) {
result.push((total + current).toString());
}
return result;
}
function numberifyString(sentence, lang) {
// Split on spaces and filter out any empty tokens.
const words = sentence.split(" ").filter((word) => word.length > 0);
let output = [];
if (lang === "en") {
output = processWordsToNumbers(words, en_1.simpleNumDict_EN);
output = applyMultipliers(output, en_1.multiplierNumDict_EN, "en");
}
else if (lang === "fr") {
output = processWordsToNumbers(words, fr_1.simpleNumDict_FR);
output = applyMultipliers(output, fr_1.multiplierNumDict_FR, "fr");
}
return output.join(" ").trim();
}
exports.numberifyString = numberifyString;