UNPKG

chrono-node

Version:

A natural language date parser in Javascript

194 lines (175 loc) 4.69 kB
import { OpUnitType, QUnitType } from "dayjs"; import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern"; import { findMostLikelyADYear } from "../../calculation/years"; import { TimeUnits } from "../../utils/timeunits"; export const WEEKDAY_DICTIONARY: { [word: string]: number } = { "sonntag": 0, "so": 0, "montag": 1, "mo": 1, "dienstag": 2, "di": 2, "mittwoch": 3, "mi": 3, "donnerstag": 4, "do": 4, "freitag": 5, "fr": 5, "samstag": 6, "sa": 6, }; export const MONTH_DICTIONARY: { [word: string]: number } = { "januar": 1, "jänner": 1, "janner": 1, "jan": 1, "jan.": 1, "februar": 2, "feber": 2, "feb": 2, "feb.": 2, "märz": 3, "maerz": 3, "mär": 3, "mär.": 3, "mrz": 3, "mrz.": 3, "april": 4, "apr": 4, "apr.": 4, "mai": 5, "juni": 6, "jun": 6, "jun.": 6, "juli": 7, "jul": 7, "jul.": 7, "august": 8, "aug": 8, "aug.": 8, "september": 9, "sep": 9, "sep.": 9, "sept": 9, "sept.": 9, "oktober": 10, "okt": 10, "okt.": 10, "november": 11, "nov": 11, "nov.": 11, "dezember": 12, "dez": 12, "dez.": 12, }; export const INTEGER_WORD_DICTIONARY: { [word: string]: number } = { "eins": 1, "eine": 1, "einem": 1, "einen": 1, "einer": 1, "zwei": 2, "drei": 3, "vier": 4, "fünf": 5, "fuenf": 5, "sechs": 6, "sieben": 7, "acht": 8, "neun": 9, "zehn": 10, "elf": 11, "zwölf": 12, "zwoelf": 12, }; export const TIME_UNIT_DICTIONARY: { [word: string]: OpUnitType | QUnitType } = { sek: "second", sekunde: "second", sekunden: "second", min: "minute", minute: "minute", minuten: "minute", h: "hour", std: "hour", stunde: "hour", stunden: "hour", tag: "d", tage: "d", tagen: "d", woche: "week", wochen: "week", monat: "month", monate: "month", monaten: "month", monats: "month", quartal: "quarter", quartals: "quarter", quartale: "quarter", quartalen: "quarter", a: "year", j: "year", jr: "year", jahr: "year", jahre: "year", jahren: "year", jahres: "year", }; //----------------------------- export const NUMBER_PATTERN = `(?:${matchAnyPattern( INTEGER_WORD_DICTIONARY )}|[0-9]+|[0-9]+\\.[0-9]+|halb?|halbe?|einigen?|wenigen?|mehreren?)`; export function parseNumberPattern(match: string): number { const num = match.toLowerCase(); if (INTEGER_WORD_DICTIONARY[num] !== undefined) { return INTEGER_WORD_DICTIONARY[num]; } else if (num === "ein" || num === "einer" || num === "einem" || num === "einen" || num === "eine") { return 1; } else if (num.match(/wenigen/)) { return 2; } else if (num.match(/halb/) || num.match(/halben/)) { return 0.5; } else if (num.match(/einigen/)) { return 3; } else if (num.match(/mehreren/)) { return 7; } return parseFloat(num); } //----------------------------- export const YEAR_PATTERN = `(?:[0-9]{1,4}(?:\\s*[vn]\\.?\\s*(?:C(?:hr)?|(?:u\\.?|d\\.?(?:\\s*g\\.?)?)?\\s*Z)\\.?|\\s*(?:u\\.?|d\\.?(?:\\s*g\\.)?)\\s*Z\\.?)?)`; export function parseYear(match: string): number { if (/v/i.test(match)) { // v.Chr. return -parseInt(match.replace(/[^0-9]+/gi, "")); } if (/n/i.test(match)) { // n.Chr. return parseInt(match.replace(/[^0-9]+/gi, "")); } if (/z/i.test(match)) { // n.Chr. as "uZ" or "dgZ" return parseInt(match.replace(/[^0-9]+/gi, "")); } const rawYearNumber = parseInt(match); return findMostLikelyADYear(rawYearNumber); } //----------------------------- const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`; const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i"); export const TIME_UNITS_PATTERN = repeatedTimeunitPattern("", SINGLE_TIME_UNIT_PATTERN); export function parseTimeUnits(timeunitText): TimeUnits { const fragments = {}; let remainingText = timeunitText; let match = SINGLE_TIME_UNIT_REGEX.exec(remainingText); while (match) { collectDateTimeFragment(fragments, match); remainingText = remainingText.substring(match[0].length); match = SINGLE_TIME_UNIT_REGEX.exec(remainingText); } return fragments; } function collectDateTimeFragment(fragments, match) { const num = parseNumberPattern(match[1]); const unit = TIME_UNIT_DICTIONARY[match[2].toLowerCase()]; fragments[unit] = num; }