UNPKG

chrono-node

Version:

A natural language date parser in Javascript

448 lines (372 loc) 14.4 kB
import { Parser, ParsingContext } from "../../chrono"; import { ParsingComponents, ParsingResult } from "../../results"; import { Meridiem } from "../../types"; // prettier-ignore function primaryTimePattern(leftBoundary: string, primaryPrefix: string, primarySuffix: string, flags: string) { return new RegExp( `${leftBoundary}` + `${primaryPrefix}` + `(\\d{1,4})` + `(?:` + `(?:\\.|:|:)` + `(\\d{1,2})` + `(?:` + `(?::|:)` + `(\\d{2})` + `(?:\\.(\\d{1,6}))?` + `)?` + `)?` + `(?:\\s*(a\\.m\\.|p\\.m\\.|am?|pm?))?` + `${primarySuffix}`, flags ); } // prettier-ignore function followingTimePatten(followingPhase: string, followingSuffix: string) { return new RegExp( `^(${followingPhase})` + `(\\d{1,4})` + `(?:` + `(?:\\.|\\:|\\:)` + `(\\d{1,2})` + `(?:` + `(?:\\.|\\:|\\:)` + `(\\d{1,2})(?:\\.(\\d{1,6}))?` + `)?` + `)?` + `(?:\\s*(a\\.m\\.|p\\.m\\.|am?|pm?))?` + `${followingSuffix}`, "i" ); } const HOUR_GROUP = 2; const MINUTE_GROUP = 3; const SECOND_GROUP = 4; const MILLI_SECOND_GROUP = 5; const AM_PM_HOUR_GROUP = 6; export abstract class AbstractTimeExpressionParser implements Parser { abstract primaryPrefix(): string; abstract followingPhase(): string; strictMode: boolean; constructor(strictMode = false) { this.strictMode = strictMode; } patternFlags(): string { return "i"; } primaryPatternLeftBoundary(): string { return `(^|\\s|T|\\b)`; } primarySuffix(): string { return `(?!/)(?=\\W|$)`; } followingSuffix(): string { return `(?!/)(?=\\W|$)`; } pattern(context: ParsingContext): RegExp { return this.getPrimaryTimePatternThroughCache(); } extract(context: ParsingContext, match: RegExpMatchArray): ParsingResult { const startComponents = this.extractPrimaryTimeComponents(context, match); if (!startComponents) { // If the match seem like a year e.g. "2013.12:...", // then skips the year part and try matching again. if (match[0].match(/^\d{4}/)) { match.index += 4; // Skip over potential overlapping pattern return null; } match.index += match[0].length; // Skip over potential overlapping pattern return null; } const index = match.index + match[1].length; const text = match[0].substring(match[1].length); const result = context.createParsingResult(index, text, startComponents); match.index += match[0].length; // Skip over potential overlapping pattern const remainingText = context.text.substring(match.index); const followingPattern = this.getFollowingTimePatternThroughCache(); const followingMatch = followingPattern.exec(remainingText); // Pattern "456-12", "2022-12" should not be time without proper context if (text.match(/^\d{3,4}/) && followingMatch) { // e.g. "2022-12" if (followingMatch[0].match(/^\s*([+-])\s*\d{2,4}$/)) { return null; } // e.g. "2022-12:01..." if (followingMatch[0].match(/^\s*([+-])\s*\d{2}\W\d{2}/)) { return null; } } if ( !followingMatch || // Pattern "YY.YY -XXXX" is more like timezone offset followingMatch[0].match(/^\s*([+-])\s*\d{3,4}$/) ) { return this.checkAndReturnWithoutFollowingPattern(result); } result.end = this.extractFollowingTimeComponents(context, followingMatch, result); if (result.end) { result.text += followingMatch[0]; } return this.checkAndReturnWithFollowingPattern(result); } extractPrimaryTimeComponents( context: ParsingContext, match: RegExpMatchArray, strict = false ): null | ParsingComponents { const components = context.createParsingComponents(); let minute = 0; let meridiem = null; // ----- Hours let hour = parseInt(match[HOUR_GROUP]); if (hour > 100) { if (this.strictMode || match[MINUTE_GROUP] != null) { return null; } minute = hour % 100; hour = Math.floor(hour / 100); } if (hour > 24) { return null; } // ----- Minutes if (match[MINUTE_GROUP] != null) { if (match[MINUTE_GROUP].length == 1 && !match[AM_PM_HOUR_GROUP]) { // Skip single digit minute e.g. "at 1.1 xx" return null; } minute = parseInt(match[MINUTE_GROUP]); } if (minute >= 60) { return null; } if (hour > 12) { meridiem = Meridiem.PM; } // ----- AM & PM if (match[AM_PM_HOUR_GROUP] != null) { if (hour > 12) return null; const ampm = match[AM_PM_HOUR_GROUP][0].toLowerCase(); if (ampm == "a") { meridiem = Meridiem.AM; if (hour == 12) { hour = 0; } } if (ampm == "p") { meridiem = Meridiem.PM; if (hour != 12) { hour += 12; } } } components.assign("hour", hour); components.assign("minute", minute); if (meridiem !== null) { components.assign("meridiem", meridiem); } else { if (hour < 12) { components.imply("meridiem", Meridiem.AM); } else { components.imply("meridiem", Meridiem.PM); } } // ----- Millisecond if (match[MILLI_SECOND_GROUP] != null) { const millisecond = parseInt(match[MILLI_SECOND_GROUP].substring(0, 3)); if (millisecond >= 1000) return null; components.assign("millisecond", millisecond); } // ----- Second if (match[SECOND_GROUP] != null) { const second = parseInt(match[SECOND_GROUP]); if (second >= 60) return null; components.assign("second", second); } return components; } extractFollowingTimeComponents( context: ParsingContext, match: RegExpMatchArray, result: ParsingResult ): null | ParsingComponents { const components = context.createParsingComponents(); // ----- Millisecond if (match[MILLI_SECOND_GROUP] != null) { const millisecond = parseInt(match[MILLI_SECOND_GROUP].substring(0, 3)); if (millisecond >= 1000) return null; components.assign("millisecond", millisecond); } // ----- Second if (match[SECOND_GROUP] != null) { const second = parseInt(match[SECOND_GROUP]); if (second >= 60) return null; components.assign("second", second); } let hour = parseInt(match[HOUR_GROUP]); let minute = 0; let meridiem = -1; // ----- Minute if (match[MINUTE_GROUP] != null) { minute = parseInt(match[MINUTE_GROUP]); } else if (hour > 100) { minute = hour % 100; hour = Math.floor(hour / 100); } if (minute >= 60 || hour > 24) { return null; } if (hour >= 12) { meridiem = Meridiem.PM; } // ----- AM & PM if (match[AM_PM_HOUR_GROUP] != null) { if (hour > 12) { return null; } const ampm = match[AM_PM_HOUR_GROUP][0].toLowerCase(); if (ampm == "a") { meridiem = Meridiem.AM; if (hour == 12) { hour = 0; if (!components.isCertain("day")) { components.imply("day", components.get("day") + 1); } } } if (ampm == "p") { meridiem = Meridiem.PM; if (hour != 12) hour += 12; } if (!result.start.isCertain("meridiem")) { if (meridiem == Meridiem.AM) { result.start.imply("meridiem", Meridiem.AM); if (result.start.get("hour") == 12) { result.start.assign("hour", 0); } } else { result.start.imply("meridiem", Meridiem.PM); if (result.start.get("hour") != 12) { result.start.assign("hour", result.start.get("hour") + 12); } } } } components.assign("hour", hour); components.assign("minute", minute); if (meridiem >= 0) { components.assign("meridiem", meridiem); } else { const startAtPM = result.start.isCertain("meridiem") && result.start.get("hour") > 12; if (startAtPM) { if (result.start.get("hour") - 12 > hour) { // 10pm - 1 (am) components.imply("meridiem", Meridiem.AM); } else if (hour <= 12) { components.assign("hour", hour + 12); components.assign("meridiem", Meridiem.PM); } } else if (hour > 12) { components.imply("meridiem", Meridiem.PM); } else if (hour <= 12) { components.imply("meridiem", Meridiem.AM); } } if (components.date().getTime() < result.start.date().getTime()) { components.imply("day", components.get("day") + 1); } return components; } private checkAndReturnWithoutFollowingPattern(result) { // Single digit (e.g "1") should not be counted as time expression (without proper context) if (result.text.match(/^\d$/)) { return null; } // Three or more digit (e.g. "203", "2014") should not be counted as time expression (without proper context) if (result.text.match(/^\d\d\d+$/)) { return null; } // Instead of "am/pm", it ends with "a" or "p" (e.g "1a", "123p"), this seems unlikely if (result.text.match(/\d[apAP]$/)) { return null; } // If it ends only with numbers or dots const endingWithNumbers = result.text.match(/[^\d:.](\d[\d.]+)$/); if (endingWithNumbers) { const endingNumbers: string = endingWithNumbers[1]; // In strict mode (e.g. "at 1" or "at 1.2"), this should not be accepted if (this.strictMode) { return null; } // If it ends only with dot single digit, e.g. "at 1.2" if (endingNumbers.includes(".") && !endingNumbers.match(/\d(\.\d{2})+$/)) { return null; } // If it ends only with numbers above 24, e.g. "at 25" const endingNumberVal = parseInt(endingNumbers); if (endingNumberVal > 24) { return null; } } return result; } private checkAndReturnWithFollowingPattern(result) { if (result.text.match(/^\d+-\d+$/)) { return null; } // If it ends only with numbers or dots const endingWithNumbers = result.text.match(/[^\d:.](\d[\d.]+)\s*-\s*(\d[\d.]+)$/); if (endingWithNumbers) { // In strict mode (e.g. "at 1-3" or "at 1.2 - 2.3"), this should not be accepted if (this.strictMode) { return null; } const startingNumbers: string = endingWithNumbers[1]; const endingNumbers: string = endingWithNumbers[2]; // If it ends only with dot single digit, e.g. "at 1.2" if (endingNumbers.includes(".") && !endingNumbers.match(/\d(\.\d{2})+$/)) { return null; } // If it ends only with numbers above 24, e.g. "at 25" const endingNumberVal = parseInt(endingNumbers); const startingNumberVal = parseInt(startingNumbers); if (endingNumberVal > 24 || startingNumberVal > 24) { return null; } } return result; } private cachedPrimaryPrefix = null; private cachedPrimarySuffix = null; private cachedPrimaryTimePattern = null; getPrimaryTimePatternThroughCache() { const primaryPrefix = this.primaryPrefix(); const primarySuffix = this.primarySuffix(); if (this.cachedPrimaryPrefix === primaryPrefix && this.cachedPrimarySuffix === primarySuffix) { return this.cachedPrimaryTimePattern; } this.cachedPrimaryTimePattern = primaryTimePattern( this.primaryPatternLeftBoundary(), primaryPrefix, primarySuffix, this.patternFlags() ); this.cachedPrimaryPrefix = primaryPrefix; this.cachedPrimarySuffix = primarySuffix; return this.cachedPrimaryTimePattern; } private cachedFollowingPhase = null; private cachedFollowingSuffix = null; private cachedFollowingTimePatten = null; getFollowingTimePatternThroughCache() { const followingPhase = this.followingPhase(); const followingSuffix = this.followingSuffix(); if (this.cachedFollowingPhase === followingPhase && this.cachedFollowingSuffix === followingSuffix) { return this.cachedFollowingTimePatten; } this.cachedFollowingTimePatten = followingTimePatten(followingPhase, followingSuffix); this.cachedFollowingPhase = followingPhase; this.cachedFollowingSuffix = followingSuffix; return this.cachedFollowingTimePatten; } }