UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

253 lines (201 loc) 7.36 kB
/* */ var moment = require('moment'); var Parser = require('../parser').Parser; var ParsedResult = require('../../result').ParsedResult; var ParsedComponents = require('../../result').ParsedComponents; var FIRST_REG_PATTERN = new RegExp("(^|\\s|T)" + "(?:(?:um|von)\\s*)?" + "(\\d{1,4}|mittags?|mitternachts?)" + "(?:" + "(?:\\.|\\:|\\:)(\\d{1,2})" + "(?:" + "(?:\\:|\\:)(\\d{2})" + ")?" + ")?" + "(?:\\s*uhr)?" + "(?:\\s*(morgens|vormittags|mittags|nachmittags|abends|nachts))?" + "(?=\\W|$)", 'i'); var SECOND_REG_PATTERN = new RegExp("^\\s*" + "(\\-|\\–|\\~|\\〜|bis|\\?)\\s*" + "(\\d{1,4})" + "(?:" + "(?:\\.|\\:|\\:)(\\d{1,2})" + "(?:" + "(?:\\.|\\:|\\:)(\\d{1,2})" + ")?" + ")?" + "(?:\\s*(morgens|vormittags|mittags|nachmittags|abends|nachts))?" + "(?=\\W|$)", 'i'); var HOUR_GROUP = 2; var MINUTE_GROUP = 3; var SECOND_GROUP = 4; var AM_PM_HOUR_GROUP = 5; exports.Parser = function DETimeExpressionParser() { Parser.apply(this, arguments); this.pattern = function() { return FIRST_REG_PATTERN; } this.extract = function(text, ref, match, opt){ // This pattern can be overlaped Ex. [12] AM, 1[2] AM if (match.index > 0 && text[match.index-1].match(/\w/)) return null; var refMoment = moment(ref); var result = new ParsedResult(); result.ref = ref; result.index = match.index + match[1].length; result.text = match[0].substring(match[1].length); result.tags['DETimeExpressionParser'] = true; result.start.imply('day', refMoment.date()); result.start.imply('month', refMoment.month()+1); result.start.imply('year', refMoment.year()); var hour = 0; var minute = 0; var meridiem = -1; // ----- Second if(match[SECOND_GROUP] != null){ var second = parseInt(match[SECOND_GROUP]); if(second >= 60) return null; result.start.assign('second', second); } // ----- Hours if (/mittags?/i.test(match[HOUR_GROUP])) { meridiem = 1; hour = 12; } else if (/mitternachts?/i.test(match[HOUR_GROUP])) { meridiem = 0; hour = 0; } else { hour = parseInt(match[HOUR_GROUP]); } // ----- Minutes if(match[MINUTE_GROUP] != null){ minute = parseInt(match[MINUTE_GROUP]); } else if(hour > 100) { minute = hour%100; hour = parseInt(hour/100); } if(minute >= 60) { return null; } if(hour > 24) { return null; } if (hour >= 12) { meridiem = 1; } // ----- AM & PM if (match[AM_PM_HOUR_GROUP] != null) { if (hour > 12) return null; var ampm = match[AM_PM_HOUR_GROUP][0].toLowerCase(); if (ampm === 'morgens' || ampm === 'vormittags') { meridiem = 0; if(hour == 12) hour = 0; } else { meridiem = 1; if(hour != 12) hour += 12; } } result.start.assign('hour', hour); result.start.assign('minute', minute); if (meridiem >= 0) { result.start.assign('meridiem', meridiem); } else { if (hour < 12) { result.start.imply('meridiem', 0); } else { result.start.imply('meridiem', 1); } } // ============================================================== // Extracting the 'to' chunk // ============================================================== match = SECOND_REG_PATTERN.exec(text.substring(result.index + result.text.length)); if (!match) { // Not accept number only result if (result.text.match(/^\d+$/)) { return null; } return result; } // Pattern "YY.YY -XXXX" is more like timezone offset if (match[0].match(/^\s*(\+|\-)\s*\d{3,4}$/)) { return result; } if(result.end == null){ result.end = new ParsedComponents(null, result.start.date()); } var hour = 0; var minute = 0; var meridiem = -1; // ----- Second if(match[SECOND_GROUP] != null){ var second = parseInt(match[SECOND_GROUP]); if(second >= 60) return null; result.end.assign('second', second); } hour = parseInt(match[2]); // ----- Minute if (match[MINUTE_GROUP]!= null) { minute = parseInt(match[MINUTE_GROUP]); if(minute >= 60) return result; } else if (hour > 100) { minute = hour%100; hour = parseInt(hour/100); } if(minute >= 60) { return null; } if(hour > 24) { return null; } if (hour >= 12) { meridiem = 1; } // ----- AM & PM if (match[AM_PM_HOUR_GROUP] != null) { if (hour > 12) return null; var ampm = match[AM_PM_HOUR_GROUP][0].toLowerCase(); if (ampm === 'morgens' || ampm === 'vormittags') { meridiem = 0; if(hour == 12) { hour = 0; if (!result.end.isCertain('day')) { result.end.imply('day', result.end.get('day') + 1); } } } else { meridiem = 1; if(hour != 12) hour += 12; } if (!result.start.isCertain('meridiem')) { if (meridiem == 0) { result.start.imply('meridiem', 0); if (result.start.get('hour') == 12) { result.start.assign('hour', 0); } } else { result.start.imply('meridiem', 1); if (result.start.get('hour') != 12) { result.start.assign('hour', result.start.get('hour') + 12); } } } } result.text = result.text + match[0]; result.end.assign('hour', hour); result.end.assign('minute', minute); if (meridiem >= 0) { result.end.assign('meridiem', meridiem); } else { var startAtPM = result.start.isCertain('meridiem') && result.start.get('meridiem') == 1; if (startAtPM && result.start.get('hour') > hour) { // 10pm - 1 (am) result.end.imply('meridiem', 0); } else if (hour > 12) { result.end.imply('meridiem', 1); } } if (result.end.date().getTime() < result.start.date().getTime()) { result.end.imply('day', result.end.get('day') + 1) } return result; } }