UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

396 lines (346 loc) 14 kB
/* */ var moment = require('moment'); var Parser = require('../parser').Parser; var ParsedResult = require('../../result').ParsedResult; var ParsedComponents = require('../../result').ParsedComponents; var util = require('../../utils/ZH-Hant.js'); var patternString1 = '(?:由|從|自)?' + '(?:' + '(今|明|聽|昨|尋|琴)(早|朝|晚)|' + '(上(?:午|晝)|朝(?:早)|早(?:上)|下(?:午|晝)|晏(?:晝)|晚(?:上)|夜(?:晚)?|中(?:午)|凌(?:晨))|' + '(今|明|聽|昨|尋|琴)(?:日|天)' + '(?:[\\s,,]*)' + '(?:(上(?:午|晝)|朝(?:早)|早(?:上)|下(?:午|晝)|晏(?:晝)|晚(?:上)|夜(?:晚)?|中(?:午)|凌(?:晨)))?' + ')?' + '(?:[\\s,,]*)' + '(?:(\\d+|[' + Object.keys(util.NUMBER).join('') + ']+)(?:\\s*)(?:點|時|:|:)' + '(?:\\s*)' + '(\\d+|半|正|整|[' + Object.keys(util.NUMBER).join('') + ']+)?(?:\\s*)(?:分|:|:)?' + '(?:\\s*)' + '(\\d+|[' + Object.keys(util.NUMBER).join('') + ']+)?(?:\\s*)(?:秒)?)' + '(?:\\s*(A\.M\.|P\.M\.|AM?|PM?))?'; var patternString2 = '(?:\\s*(?:到|至|\\-|\\–|\\~|\\〜)\\s*)' + '(?:' + '(今|明|聽|昨|尋|琴)(早|朝|晚)|' + '(上(?:午|晝)|朝(?:早)|早(?:上)|下(?:午|晝)|晏(?:晝)|晚(?:上)|夜(?:晚)?|中(?:午)|凌(?:晨))|' + '(今|明|聽|昨|尋|琴)(?:日|天)' + '(?:[\\s,,]*)' + '(?:(上(?:午|晝)|朝(?:早)|早(?:上)|下(?:午|晝)|晏(?:晝)|晚(?:上)|夜(?:晚)?|中(?:午)|凌(?:晨)))?' + ')?' + '(?:[\\s,,]*)' + '(?:(\\d+|[' + Object.keys(util.NUMBER).join('') + ']+)(?:\\s*)(?:點|時|:|:)' + '(?:\\s*)' + '(\\d+|半|正|整|[' + Object.keys(util.NUMBER).join('') + ']+)?(?:\\s*)(?:分|:|:)?' + '(?:\\s*)' + '(\\d+|[' + Object.keys(util.NUMBER).join('') + ']+)?(?:\\s*)(?:秒)?)' + '(?:\\s*(A\.M\.|P\.M\.|AM?|PM?))?'; var FIRST_REG_PATTERN = new RegExp(patternString1, 'i'); var SECOND_REG_PATTERN = new RegExp(patternString2, 'i'); var DAY_GROUP_1 = 1; var ZH_AM_PM_HOUR_GROUP_1 = 2; var ZH_AM_PM_HOUR_GROUP_2 = 3; var DAY_GROUP_3 = 4; var ZH_AM_PM_HOUR_GROUP_3 = 5; var HOUR_GROUP = 6; var MINUTE_GROUP = 7; var SECOND_GROUP = 8; var AM_PM_HOUR_GROUP = 9; exports.Parser = function ZHHantTimeExpressionParser() { Parser.apply(this, arguments); this.pattern = function() { return FIRST_REG_PATTERN; }; this.extract = function(text, ref, match, opt) { // This pattern can be overlaped Ex. [12] AM, 1[2] AM if (match.index > 0 && text[match.index - 1].match(/\w/)) return null; var refMoment = moment(ref); var result = new ParsedResult(); result.ref = ref; result.index = match.index; result.text = match[0]; result.tags.ZHTimeExpressionParser = true; var startMoment = refMoment.clone(); // ----- Day if (match[DAY_GROUP_1]) { var day1 = match[DAY_GROUP_1]; if (day1 == '明' || day1 == '聽') { // Check not "Tomorrow" on late night if(refMoment.hour() > 1) { startMoment.add(1, 'day'); } } else if (day1 == '昨' || day1 == '尋' || day1 == '琴') { startMoment.add(-1, 'day'); } result.start.assign('day', startMoment.date()); result.start.assign('month', startMoment.month() + 1); result.start.assign('year', startMoment.year()); } else if (match[DAY_GROUP_3]) { var day3 = match[DAY_GROUP_3]; if (day3 == '明' || day3 == '聽') { startMoment.add(1, 'day'); } else if (day3 == '昨' || day3 == '尋' || day3 == '琴') { startMoment.add(-1, 'day'); } result.start.assign('day', startMoment.date()); result.start.assign('month', startMoment.month() + 1); result.start.assign('year', startMoment.year()); } else { result.start.imply('day', startMoment.date()); result.start.imply('month', startMoment.month() + 1); result.start.imply('year', startMoment.year()); } var hour = 0; var minute = 0; var meridiem = -1; // ----- Second if (match[SECOND_GROUP]) { var second = parseInt(match[SECOND_GROUP]); if (isNaN(second)) { second = util.zhStringToNumber(match[SECOND_GROUP]); } if (second >= 60) return null; result.start.assign('second', second); } hour = parseInt(match[HOUR_GROUP]); if (isNaN(hour)) { hour = util.zhStringToNumber(match[HOUR_GROUP]); } // ----- Minutes if (match[MINUTE_GROUP]) { if (match[MINUTE_GROUP] == '半') { minute = 30; } else if (match[MINUTE_GROUP] == '正' || match[MINUTE_GROUP] == '整') { minute = 0; } else { minute = parseInt(match[MINUTE_GROUP]); if (isNaN(minute)) { minute = util.zhStringToNumber(match[MINUTE_GROUP]); } } } else if (hour > 100) { minute = hour % 100; hour = parseInt(hour / 100); } if (minute >= 60) { return null; } if (hour > 24) { return null; } if (hour >= 12) { meridiem = 1; } // ----- AM & PM if (match[AM_PM_HOUR_GROUP]) { if (hour > 12) return null; var ampm = match[AM_PM_HOUR_GROUP][0].toLowerCase(); if (ampm == "a") { meridiem = 0; if (hour == 12) hour = 0; } if (ampm == "p") { meridiem = 1; if (hour != 12) hour += 12; } } else if (match[ZH_AM_PM_HOUR_GROUP_1]) { var zhAMPMString1 = match[ZH_AM_PM_HOUR_GROUP_1]; var zhAMPM1 = zhAMPMString1[0]; if (zhAMPM1 == '朝' || zhAMPM1 == '早') { meridiem = 0; if (hour == 12) hour = 0; } else if (zhAMPM1 == '晚') { meridiem = 1; if (hour != 12) hour += 12; } } else if (match[ZH_AM_PM_HOUR_GROUP_2]) { var zhAMPMString2 = match[ZH_AM_PM_HOUR_GROUP_2]; var zhAMPM2 = zhAMPMString2[0]; if (zhAMPM2 == '上' || zhAMPM2 == '朝' || zhAMPM2 == '早' || zhAMPM2 == '凌') { meridiem = 0; if (hour == 12) hour = 0; } else if (zhAMPM2 == '下' || zhAMPM2 == '晏' || zhAMPM2 == '晚') { meridiem = 1; if (hour != 12) hour += 12; } } else if (match[ZH_AM_PM_HOUR_GROUP_3]) { var zhAMPMString3 = match[ZH_AM_PM_HOUR_GROUP_3]; var zhAMPM3 = zhAMPMString3[0]; if (zhAMPM3 == '上' || zhAMPM3 == '朝' || zhAMPM3 == '早' || zhAMPM3 == '凌') { meridiem = 0; if (hour == 12) hour = 0; } else if (zhAMPM3 == '下' || zhAMPM3 == '晏' || zhAMPM3 == '晚') { meridiem = 1; if (hour != 12) hour += 12; } } result.start.assign('hour', hour); result.start.assign('minute', minute); if (meridiem >= 0) { result.start.assign('meridiem', meridiem); } else { if (hour < 12) { result.start.imply('meridiem', 0); } else { result.start.imply('meridiem', 1); } } // ============================================================== // Extracting the 'to' chunk // ============================================================== match = SECOND_REG_PATTERN.exec(text.substring(result.index + result.text.length)); if (!match) { // Not accept number only result if (result.text.match(/^\d+$/)) { return null; } return result; } var endMoment = startMoment.clone(); result.end = new ParsedComponents(null, null); // ----- Day if (match[DAY_GROUP_1]) { var day1 = match[DAY_GROUP_1]; if (day1 == '明' || day1 == '聽') { // Check not "Tomorrow" on late night if(refMoment.hour() > 1) { endMoment.add(1, 'day'); } } else if (day1 == '昨' || day1 == '尋' || day1 == '琴') { endMoment.add(-1, 'day'); } result.end.assign('day', endMoment.date()); result.end.assign('month', endMoment.month() + 1); result.end.assign('year', endMoment.year()); } else if (match[DAY_GROUP_3]) { var day3 = match[DAY_GROUP_3]; if (day3 == '明' || day3 == '聽') { endMoment.add(1, 'day'); } else if (day3 == '昨' || day3 == '尋' || day3 == '琴') { endMoment.add(-1, 'day'); } result.end.assign('day', endMoment.date()); result.end.assign('month', endMoment.month() + 1); result.end.assign('year', endMoment.year()); } else { result.end.imply('day', endMoment.date()); result.end.imply('month', endMoment.month() + 1); result.end.imply('year', endMoment.year()); } hour = 0; minute = 0; meridiem = -1; // ----- Second if (match[SECOND_GROUP]) { var second = parseInt(match[SECOND_GROUP]); if (isNaN(second)) { second = util.zhStringToNumber(match[SECOND_GROUP]); } if (second >= 60) return null; result.end.assign('second', second); } hour = parseInt(match[HOUR_GROUP]); if (isNaN(hour)) { hour = util.zhStringToNumber(match[HOUR_GROUP]); } // ----- Minutes if (match[MINUTE_GROUP]) { if (match[MINUTE_GROUP] == '半') { minute = 30; } else if (match[MINUTE_GROUP] == '正' || match[MINUTE_GROUP] == '整') { minute = 0; } else { minute = parseInt(match[MINUTE_GROUP]); if (isNaN(minute)) { minute = util.zhStringToNumber(match[MINUTE_GROUP]); } } } else if (hour > 100) { minute = hour % 100; hour = parseInt(hour / 100); } if (minute >= 60) { return null; } if (hour > 24) { return null; } if (hour >= 12) { meridiem = 1; } // ----- AM & PM if (match[AM_PM_HOUR_GROUP]) { if (hour > 12) return null; var ampm = match[AM_PM_HOUR_GROUP][0].toLowerCase(); if (ampm == "a") { meridiem = 0; if (hour == 12) hour = 0; } if (ampm == "p") { meridiem = 1; if (hour != 12) hour += 12; } if (!result.start.isCertain('meridiem')) { if (meridiem == 0) { result.start.imply('meridiem', 0); if (result.start.get('hour') == 12) { result.start.assign('hour', 0); } } else { result.start.imply('meridiem', 1); if (result.start.get('hour') != 12) { result.start.assign('hour', result.start.get('hour') + 12); } } } } else if (match[ZH_AM_PM_HOUR_GROUP_1]) { var zhAMPMString1 = match[ZH_AM_PM_HOUR_GROUP_1]; var zhAMPM1 = zhAMPMString1[0]; if (zhAMPM1 == '朝' || zhAMPM1 == '早') { meridiem = 0; if (hour == 12) hour = 0; } else if (zhAMPM1 == '晚') { meridiem = 1; if (hour != 12) hour += 12; } } else if (match[ZH_AM_PM_HOUR_GROUP_2]) { var zhAMPMString2 = match[ZH_AM_PM_HOUR_GROUP_2]; var zhAMPM2 = zhAMPMString2[0]; if (zhAMPM2 == '上' || zhAMPM2 == '朝' || zhAMPM2 == '早' || zhAMPM2 == '凌') { meridiem = 0; if (hour == 12) hour = 0; } else if (zhAMPM2 == '下' || zhAMPM2 == '晏' || zhAMPM2 == '晚') { meridiem = 1; if (hour != 12) hour += 12; } } else if (match[ZH_AM_PM_HOUR_GROUP_3]) { var zhAMPMString3 = match[ZH_AM_PM_HOUR_GROUP_3]; var zhAMPM3 = zhAMPMString3[0]; if (zhAMPM3 == '上' || zhAMPM3 == '朝' || zhAMPM3 == '早' || zhAMPM3 == '凌') { meridiem = 0; if (hour == 12) hour = 0; } else if (zhAMPM3 == '下' || zhAMPM3 == '晏' || zhAMPM3 == '晚') { meridiem = 1; if (hour != 12) hour += 12; } } result.text = result.text + match[0]; result.end.assign('hour', hour); result.end.assign('minute', minute); if (meridiem >= 0) { result.end.assign('meridiem', meridiem); } else { var startAtPM = result.start.isCertain('meridiem') && result.start.get('meridiem') == 1; if (startAtPM && result.start.get('hour') > hour) { // 10pm - 1 (am) result.end.imply('meridiem', 0); } else if (hour > 12) { result.end.imply('meridiem', 1); } } if (result.end.date().getTime() < result.start.date().getTime()) { result.end.imply('day', result.end.get('day') + 1) } return result; }; };