UNPKG

chrono-node

Version:

A natural language date parser in Javascript

185 lines (165 loc) 4.51 kB
import { OpUnitType, QUnitType } from "dayjs"; import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern"; export const WEEKDAY_DICTIONARY: { [word: string]: number } = { "dimanche": 0, "dim": 0, "lundi": 1, "lun": 1, "mardi": 2, "mar": 2, "mercredi": 3, "mer": 3, "jeudi": 4, "jeu": 4, "vendredi": 5, "ven": 5, "samedi": 6, "sam": 6, }; export const MONTH_DICTIONARY: { [word: string]: number } = { "janvier": 1, "jan": 1, "jan.": 1, "février": 2, "fév": 2, "fév.": 2, "fevrier": 2, "fev": 2, "fev.": 2, "mars": 3, "mar": 3, "mar.": 3, "avril": 4, "avr": 4, "avr.": 4, "mai": 5, "juin": 6, "jun": 6, "juillet": 7, "juil": 7, "jul": 7, "jul.": 7, "août": 8, "aout": 8, "septembre": 9, "sep": 9, "sep.": 9, "sept": 9, "sept.": 9, "octobre": 10, "oct": 10, "oct.": 10, "novembre": 11, "nov": 11, "nov.": 11, "décembre": 12, "decembre": 12, "dec": 12, "dec.": 12, }; export const INTEGER_WORD_DICTIONARY: { [word: string]: number } = { "un": 1, "deux": 2, "trois": 3, "quatre": 4, "cinq": 5, "six": 6, "sept": 7, "huit": 8, "neuf": 9, "dix": 10, "onze": 11, "douze": 12, "treize": 13, }; export const TIME_UNIT_DICTIONARY: { [word: string]: OpUnitType | QUnitType } = { "sec": "second", "seconde": "second", "secondes": "second", "min": "minute", "mins": "minute", "minute": "minute", "minutes": "minute", "h": "hour", "hr": "hour", "hrs": "hour", "heure": "hour", "heures": "hour", "jour": "d", "jours": "d", "semaine": "week", "semaines": "week", "mois": "month", "trimestre": "quarter", "trimestres": "quarter", "ans": "year", "année": "year", "années": "year", }; //----------------------------- export const NUMBER_PATTERN = `(?:${matchAnyPattern( INTEGER_WORD_DICTIONARY )}|[0-9]+|[0-9]+\\.[0-9]+|une?\\b|quelques?|demi-?)`; export function parseNumberPattern(match: string): number { const num = match.toLowerCase(); if (INTEGER_WORD_DICTIONARY[num] !== undefined) { return INTEGER_WORD_DICTIONARY[num]; } else if (num === "une" || num === "un") { return 1; } else if (num.match(/quelques?/)) { return 3; } else if (num.match(/demi-?/)) { return 0.5; } return parseFloat(num); } //----------------------------- export const ORDINAL_NUMBER_PATTERN = `(?:[0-9]{1,2}(?:er)?)`; export function parseOrdinalNumberPattern(match: string): number { let num = match.toLowerCase(); num = num.replace(/(?:er)$/i, ""); return parseInt(num); } //----------------------------- // 88 p. Chr. n. // 234 AC export const YEAR_PATTERN = `(?:[1-9][0-9]{0,3}\\s*(?:AC|AD|p\\.\\s*C(?:hr?)?\\.\\s*n\\.)|[1-2][0-9]{3}|[5-9][0-9])`; export function parseYear(match: string): number { if (/AC/i.test(match)) { match = match.replace(/BC/i, ""); return -parseInt(match); } if (/AD/i.test(match) || /C/i.test(match)) { match = match.replace(/[^\d]+/i, ""); return parseInt(match); } let yearNumber = parseInt(match); if (yearNumber < 100) { if (yearNumber > 50) { yearNumber = yearNumber + 1900; } else { yearNumber = yearNumber + 2000; } } return yearNumber; } //----------------------------- const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`; const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i"); export const TIME_UNITS_PATTERN = repeatedTimeunitPattern("", SINGLE_TIME_UNIT_PATTERN); export function parseTimeUnits(timeunitText): { [c in OpUnitType | QUnitType]?: number } { const fragments = {}; let remainingText = timeunitText; let match = SINGLE_TIME_UNIT_REGEX.exec(remainingText); while (match) { collectDateTimeFragment(fragments, match); remainingText = remainingText.substring(match[0].length); match = SINGLE_TIME_UNIT_REGEX.exec(remainingText); } return fragments as { [c in OpUnitType | QUnitType]?: number }; } function collectDateTimeFragment(fragments, match) { const num = parseNumberPattern(match[1]); const unit = TIME_UNIT_DICTIONARY[match[2].toLowerCase()]; fragments[unit] = num; }