chrono-node
Version:
A natural language date parser in Javascript
258 lines • 6.48 kB
JavaScript
import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern.js";
import { findMostLikelyADYear } from "../../calculation/years.js";
export const WEEKDAY_DICTIONARY = {
sunday: 0,
sun: 0,
"sun.": 0,
monday: 1,
mon: 1,
"mon.": 1,
tuesday: 2,
tue: 2,
"tue.": 2,
wednesday: 3,
wed: 3,
"wed.": 3,
thursday: 4,
thurs: 4,
"thurs.": 4,
thur: 4,
"thur.": 4,
thu: 4,
"thu.": 4,
friday: 5,
fri: 5,
"fri.": 5,
saturday: 6,
sat: 6,
"sat.": 6,
};
export const FULL_MONTH_NAME_DICTIONARY = {
january: 1,
february: 2,
march: 3,
april: 4,
may: 5,
june: 6,
july: 7,
august: 8,
september: 9,
october: 10,
november: 11,
december: 12,
};
export const MONTH_DICTIONARY = {
...FULL_MONTH_NAME_DICTIONARY,
jan: 1,
"jan.": 1,
feb: 2,
"feb.": 2,
mar: 3,
"mar.": 3,
apr: 4,
"apr.": 4,
jun: 6,
"jun.": 6,
jul: 7,
"jul.": 7,
aug: 8,
"aug.": 8,
sep: 9,
"sep.": 9,
sept: 9,
"sept.": 9,
oct: 10,
"oct.": 10,
nov: 11,
"nov.": 11,
dec: 12,
"dec.": 12,
};
export const INTEGER_WORD_DICTIONARY = {
one: 1,
two: 2,
three: 3,
four: 4,
five: 5,
six: 6,
seven: 7,
eight: 8,
nine: 9,
ten: 10,
eleven: 11,
twelve: 12,
};
export const ORDINAL_WORD_DICTIONARY = {
first: 1,
second: 2,
third: 3,
fourth: 4,
fifth: 5,
sixth: 6,
seventh: 7,
eighth: 8,
ninth: 9,
tenth: 10,
eleventh: 11,
twelfth: 12,
thirteenth: 13,
fourteenth: 14,
fifteenth: 15,
sixteenth: 16,
seventeenth: 17,
eighteenth: 18,
nineteenth: 19,
twentieth: 20,
"twenty first": 21,
"twenty-first": 21,
"twenty second": 22,
"twenty-second": 22,
"twenty third": 23,
"twenty-third": 23,
"twenty fourth": 24,
"twenty-fourth": 24,
"twenty fifth": 25,
"twenty-fifth": 25,
"twenty sixth": 26,
"twenty-sixth": 26,
"twenty seventh": 27,
"twenty-seventh": 27,
"twenty eighth": 28,
"twenty-eighth": 28,
"twenty ninth": 29,
"twenty-ninth": 29,
"thirtieth": 30,
"thirty first": 31,
"thirty-first": 31,
};
export const TIME_UNIT_DICTIONARY_NO_ABBR = {
second: "second",
seconds: "second",
minute: "minute",
minutes: "minute",
hour: "hour",
hours: "hour",
day: "d",
days: "d",
week: "week",
weeks: "week",
month: "month",
months: "month",
quarter: "quarter",
quarters: "quarter",
year: "year",
years: "year",
};
export const TIME_UNIT_DICTIONARY = {
s: "second",
sec: "second",
second: "second",
seconds: "second",
m: "minute",
min: "minute",
mins: "minute",
minute: "minute",
minutes: "minute",
h: "hour",
hr: "hour",
hrs: "hour",
hour: "hour",
hours: "hour",
d: "d",
day: "d",
days: "d",
w: "w",
week: "week",
weeks: "week",
mo: "month",
mon: "month",
mos: "month",
month: "month",
months: "month",
qtr: "quarter",
quarter: "quarter",
quarters: "quarter",
y: "year",
yr: "year",
year: "year",
years: "year",
...TIME_UNIT_DICTIONARY_NO_ABBR,
};
export const NUMBER_PATTERN = `(?:${matchAnyPattern(INTEGER_WORD_DICTIONARY)}|[0-9]+|[0-9]+\\.[0-9]+|half(?:\\s{0,2}an?)?|an?\\b(?:\\s{0,2}few)?|few|several|the|a?\\s{0,2}couple\\s{0,2}(?:of)?)`;
export function parseNumberPattern(match) {
const num = match.toLowerCase();
if (INTEGER_WORD_DICTIONARY[num] !== undefined) {
return INTEGER_WORD_DICTIONARY[num];
}
else if (num === "a" || num === "an" || num == "the") {
return 1;
}
else if (num.match(/few/)) {
return 3;
}
else if (num.match(/half/)) {
return 0.5;
}
else if (num.match(/couple/)) {
return 2;
}
else if (num.match(/several/)) {
return 7;
}
return parseFloat(num);
}
export const ORDINAL_NUMBER_PATTERN = `(?:${matchAnyPattern(ORDINAL_WORD_DICTIONARY)}|[0-9]{1,2}(?:st|nd|rd|th)?)`;
export function parseOrdinalNumberPattern(match) {
let num = match.toLowerCase();
if (ORDINAL_WORD_DICTIONARY[num] !== undefined) {
return ORDINAL_WORD_DICTIONARY[num];
}
num = num.replace(/(?:st|nd|rd|th)$/i, "");
return parseInt(num);
}
export const YEAR_PATTERN = `(?:[1-9][0-9]{0,3}\\s{0,2}(?:BE|AD|BC|BCE|CE)|[1-2][0-9]{3}|[5-9][0-9]|2[0-5])`;
export function parseYear(match) {
if (/BE/i.test(match)) {
match = match.replace(/BE/i, "");
return parseInt(match) - 543;
}
if (/BCE?/i.test(match)) {
match = match.replace(/BCE?/i, "");
return -parseInt(match);
}
if (/(AD|CE)/i.test(match)) {
match = match.replace(/(AD|CE)/i, "");
return parseInt(match);
}
const rawYearNumber = parseInt(match);
return findMostLikelyADYear(rawYearNumber);
}
const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,3}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})`;
const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");
const SINGLE_TIME_UNIT_NO_ABBR_PATTERN = `(${NUMBER_PATTERN})\\s{0,3}(${matchAnyPattern(TIME_UNIT_DICTIONARY_NO_ABBR)})`;
const TIME_UNIT_CONNECTOR_PATTERN = `\\s{0,5},?(?:\\s*and)?\\s{0,5}`;
export const TIME_UNITS_PATTERN = repeatedTimeunitPattern(`(?:(?:about|around)\\s{0,3})?`, SINGLE_TIME_UNIT_PATTERN, TIME_UNIT_CONNECTOR_PATTERN);
export const TIME_UNITS_NO_ABBR_PATTERN = repeatedTimeunitPattern(`(?:(?:about|around)\\s{0,3})?`, SINGLE_TIME_UNIT_NO_ABBR_PATTERN, TIME_UNIT_CONNECTOR_PATTERN);
export function parseTimeUnits(timeunitText) {
const fragments = {};
let remainingText = timeunitText;
let match = SINGLE_TIME_UNIT_REGEX.exec(remainingText);
while (match) {
collectDateTimeFragment(fragments, match);
remainingText = remainingText.substring(match[0].length).trim();
match = SINGLE_TIME_UNIT_REGEX.exec(remainingText);
}
if (Object.keys(fragments).length == 0) {
return null;
}
return fragments;
}
function collectDateTimeFragment(fragments, match) {
if (match[0].match(/^[a-zA-Z]+$/)) {
return;
}
const num = parseNumberPattern(match[1]);
const unit = TIME_UNIT_DICTIONARY[match[2].toLowerCase()];
fragments[unit] = num;
}
//# sourceMappingURL=constants.js.map