UNPKG

numberify-converter

Version:

Convert numbers from text in various languages into their numeric equivalent.

github.com/moasko/numberify

moasko/numberify

131 lines (130 loc) • 5.05 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.numberifyString = void 0; const en_1 = require("./en"); const fr_1 = require("./fr"); function normalizeString(str) { return str .toLowerCase() .normalize("NFD") .replace(/[\u0300-\u036f]/g, ""); } function processWordsToNumbers(words, dict) { return words.map((word) => { const normalizedWord = normalizeString(word); // Cas simple : mot complet dans le dictionnaire if (dict[normalizedWord] !== undefined) { return dict[normalizedWord].toString(); } if (dict[word] !== undefined) { return dict[word].toString(); } // Traitement des mots composés avec tirets const parts = word.split("-"); let sum = 0; let allPartsValid = true; // Gestion spéciale pour "quatre-vingt" (80 et au-delà) if (parts.length >= 2 && normalizeString(parts[0]) === "quatre" && normalizeString(parts[1]) === "vingt") { sum = 80; // 4 * 20 if (parts.length === 3) { const thirdPart = normalizeString(parts[2]); const thirdValue = dict[thirdPart] !== undefined ? dict[thirdPart] : dict[parts[2]]; if (thirdValue !== undefined) { sum += thirdValue; // Ajoute la partie finale (ex. "trois" pour 83) } else { allPartsValid = false; } } } else { // Logique pour les autres cas (addition simple) for (const part of parts) { const normalizedPart = normalizeString(part); if (dict[normalizedPart] === undefined && dict[part] === undefined) { allPartsValid = false; break; } const partValue = dict[normalizedPart] !== undefined ? dict[normalizedPart] : dict[part]; sum += partValue; } } return allPartsValid ? sum.toString() : word; }); } function applyMultipliers(tokens, multiplierDict, lang) { let total = 0; let current = 0; let pendingNumeric = false; // tracks that we have processed at least one numeric token const result = []; // In French, skip filler words like "et" const isFiller = (token) => lang === "fr" && normalizeString(token) === "et"; for (const token of tokens) { if (isFiller(token)) continue; const num = Number(token); if (!isNaN(num)) { pendingNumeric = true; current += num; } else if (multiplierDict[normalizeString(token)] !== undefined || multiplierDict[token] !== undefined) { // Get the multiplier either via the normalized or original token. const normalizedToken = multiplierDict[normalizeString(token)] !== undefined ? normalizeString(token) : token; const multiplier = multiplierDict[normalizedToken]; if (multiplier < 1000) { if (!pendingNumeric) { current = 1; pendingNumeric = true; } current *= multiplier; } else { if (!pendingNumeric) { current = 1; pendingNumeric = true; } current *= multiplier; total += current; current = 0; // Remove the reset of pendingNumeric here so that we remember we've processed numeric tokens } } else { // On a non-numeric token, flush the current number if any. if (pendingNumeric) { result.push((total + current).toString()); total = 0; current = 0; pendingNumeric = false; } result.push(token); } } // If the entire sentence was numeric, flush the final accumulated value. if (pendingNumeric) { result.push((total + current).toString()); } return result; } function numberifyString(sentence, lang) { // Split on spaces and filter out any empty tokens. const words = sentence.split(" ").filter((word) => word.length > 0); let output = []; if (lang === "en") { output = processWordsToNumbers(words, en_1.simpleNumDict_EN); output = applyMultipliers(output, en_1.multiplierNumDict_EN, "en"); } else if (lang === "fr") { output = processWordsToNumbers(words, fr_1.simpleNumDict_FR); output = applyMultipliers(output, fr_1.multiplierNumDict_FR, "fr"); } return output.join(" ").trim(); } exports.numberifyString = numberifyString;