chrono-node
Version:
A natural language date parser in Javascript
175 lines (160 loc) • 4.29 kB
text/typescript
import { OpUnitType, QUnitType } from "dayjs";
import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern";
export const WEEKDAY_DICTIONARY: { [word: string]: number } = {
"domingo": 0,
"dom": 0,
"lunes": 1,
"lun": 1,
"martes": 2,
"mar": 2,
"miércoles": 3,
"miercoles": 3,
"mié": 3,
"mie": 3,
"jueves": 4,
"jue": 4,
"viernes": 5,
"vie": 5,
"sábado": 6,
"sabado": 6,
"sáb": 6,
"sab": 6,
};
export const MONTH_DICTIONARY: { [word: string]: number } = {
"enero": 1,
"ene": 1,
"ene.": 1,
"febrero": 2,
"feb": 2,
"feb.": 2,
"marzo": 3,
"mar": 3,
"mar.": 3,
"abril": 4,
"abr": 4,
"abr.": 4,
"mayo": 5,
"may": 5,
"may.": 5,
"junio": 6,
"jun": 6,
"jun.": 6,
"julio": 7,
"jul": 7,
"jul.": 7,
"agosto": 8,
"ago": 8,
"ago.": 8,
"septiembre": 9,
"setiembre": 9,
"sep": 9,
"sep.": 9,
"octubre": 10,
"oct": 10,
"oct.": 10,
"noviembre": 11,
"nov": 11,
"nov.": 11,
"diciembre": 12,
"dic": 12,
"dic.": 12,
};
export const INTEGER_WORD_DICTIONARY: { [word: string]: number } = {
"uno": 1,
"dos": 2,
"tres": 3,
"cuatro": 4,
"cinco": 5,
"seis": 6,
"siete": 7,
"ocho": 8,
"nueve": 9,
"diez": 10,
"once": 11,
"doce": 12,
"trece": 13,
};
export const TIME_UNIT_DICTIONARY: { [word: string]: OpUnitType | QUnitType } = {
"sec": "second",
"segundo": "second",
"segundos": "second",
"min": "minute",
"mins": "minute",
"minuto": "minute",
"minutos": "minute",
"h": "hour",
"hr": "hour",
"hrs": "hour",
"hora": "hour",
"horas": "hour",
"día": "d",
"días": "d",
"semana": "week",
"semanas": "week",
"mes": "month",
"meses": "month",
"cuarto": "quarter",
"cuartos": "quarter",
"año": "year",
"años": "year",
};
//-----------------------------
export const NUMBER_PATTERN = `(?:${matchAnyPattern(
INTEGER_WORD_DICTIONARY
)}|[0-9]+|[0-9]+\\.[0-9]+|un?|uno?|una?|algunos?|unos?|demi-?)`;
export function parseNumberPattern(match: string): number {
const num = match.toLowerCase();
if (INTEGER_WORD_DICTIONARY[num] !== undefined) {
return INTEGER_WORD_DICTIONARY[num];
} else if (num === "un" || num === "una" || num === "uno") {
return 1;
} else if (num.match(/algunos?/)) {
return 3;
} else if (num.match(/unos?/)) {
return 3;
} else if (num.match(/media?/)) {
return 0.5;
}
return parseFloat(num);
}
//-----------------------------
// 88 p. Chr. n.
// 234 AC
export const YEAR_PATTERN = "[0-9]{1,4}(?![^\\s]\\d)(?:\\s*[a|d]\\.?\\s*c\\.?|\\s*a\\.?\\s*d\\.?)?";
export function parseYear(match: string): number {
if (match.match(/^[0-9]{1,4}$/)) {
let yearNumber = parseInt(match);
if (yearNumber < 100) {
if (yearNumber > 50) {
yearNumber = yearNumber + 1900;
} else {
yearNumber = yearNumber + 2000;
}
}
return yearNumber;
}
if (match.match(/a\.?\s*c\.?/i)) {
match = match.replace(/a\.?\s*c\.?/i, "");
return -parseInt(match);
}
return parseInt(match);
}
const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`;
const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");
export const TIME_UNITS_PATTERN = repeatedTimeunitPattern("", SINGLE_TIME_UNIT_PATTERN);
export function parseTimeUnits(timeunitText): { [c in OpUnitType | QUnitType]?: number } {
const fragments = {};
let remainingText = timeunitText;
let match = SINGLE_TIME_UNIT_REGEX.exec(remainingText);
while (match) {
collectDateTimeFragment(fragments, match);
remainingText = remainingText.substring(match[0].length);
match = SINGLE_TIME_UNIT_REGEX.exec(remainingText);
}
return fragments as { [c in OpUnitType | QUnitType]?: number };
}
function collectDateTimeFragment(fragments, match) {
const num = parseNumberPattern(match[1]);
const unit = TIME_UNIT_DICTIONARY[match[2].toLowerCase()];
fragments[unit] = num;
}