UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

137 lines (113 loc) 4.52 kB
/* The parser for parsing US's date format that begin with month's name. EX. - January 13 - January 13, 2012 - January 13 - 15, 2012 - Tuesday, January 13, 2012 Watch out for: - January 12:00 - January 12.44 - January 1222344 */ var moment = require('moment'); var Parser = require('../parser').Parser; var ParsedResult = require('../../result').ParsedResult; var util = require('../../utils/EN'); var PATTERN = new RegExp('(\\W|^)' + '(?:' + '(?:on\\s*?)?' + '(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sun\\.?|Mon\\.?|Tue\\.?|Wed\\.?|Thu\\.?|Fri\\.?|Sat\\.?)' + '\\s*,?\\s*)?' + '(Jan\\.?|January|Feb\\.?|February|Mar\\.?|March|Apr\\.?|April|May\\.?|Jun\\.?|June|Jul\\.?|July|Aug\\.?|August|Sep\\.?|Sept\\.?|September|Oct\\.?|October|Nov\\.?|November|Dec\\.?|December)' + '(?:-|\/|\\s*,?\\s*)' + '(([0-9]{1,2})(?:st|nd|rd|th)?|' + util.ORDINAL_WORDS_PATTERN +')\\s*' + '(?:' + '(?:to|\\-)\\s*' + '(([0-9]{1,2})(?:st|nd|rd|th)?| ' + util.ORDINAL_WORDS_PATTERN + ')\\s*' + ')?' + '(?:' + '(?:-|\/|\\s*,?\\s*)' + '(?:([0-9]{4})\\s*(BE|AD|BC)?|([0-9]{1,4})\\s*(AD|BC))\\s*' + ')?' + '(?=\\W|$)(?!\\:\\d)', 'i'); var WEEKDAY_GROUP = 2; var MONTH_NAME_GROUP = 3; var DATE_GROUP = 4; var DATE_NUM_GROUP = 5; var DATE_TO_GROUP = 6; var DATE_TO_NUM_GROUP = 7; var YEAR_GROUP = 8; var YEAR_BE_GROUP = 9; var YEAR_GROUP2 = 10; var YEAR_BE_GROUP2 = 11; exports.Parser = function ENMonthNameMiddleEndianParser(){ Parser.apply(this, arguments); this.pattern = function() { return PATTERN; } this.extract = function(text, ref, match, opt){ var result = new ParsedResult({ text: match[0].substr(match[1].length, match[0].length - match[1].length), index: match.index + match[1].length, ref: ref, }); var month = match[MONTH_NAME_GROUP]; month = util.MONTH_OFFSET[month.toLowerCase()]; var day = match[DATE_NUM_GROUP] ? parseInt(match[DATE_NUM_GROUP]) : util.ORDINAL_WORDS[match[DATE_GROUP].trim().replace('-', ' ').toLowerCase()]; var year = null; if (match[YEAR_GROUP] || match[YEAR_GROUP2]) { year = match[YEAR_GROUP] || match[YEAR_GROUP2]; year = parseInt(year); var yearBE = match[YEAR_BE_GROUP] || match[YEAR_BE_GROUP2]; if (yearBE) { if (/BE/i.test(yearBE)) { // Buddhist Era year = year - 543; } else if (/BC/i.test(yearBE)) { // Before Christ year = -year; } } else if (year < 100){ year = year + 2000; } } if(year){ result.start.assign('day', day); result.start.assign('month', month); result.start.assign('year', year); } else { //Find the most appropriated year var refMoment = moment(ref); refMoment.month(month - 1); refMoment.date(day); var nextYear = refMoment.clone().add(1, 'y'); var lastYear = refMoment.clone().add(-1, 'y'); if( Math.abs(nextYear.diff(moment(ref))) < Math.abs(refMoment.diff(moment(ref))) ){ refMoment = nextYear; } else if( Math.abs(lastYear.diff(moment(ref))) < Math.abs(refMoment.diff(moment(ref))) ){ refMoment = lastYear; } result.start.assign('day', day); result.start.assign('month', month); result.start.imply('year', refMoment.year()); } // Weekday component if (match[WEEKDAY_GROUP]) { var weekday = match[WEEKDAY_GROUP]; weekday = util.WEEKDAY_OFFSET[weekday.toLowerCase()] result.start.assign('weekday', weekday); } // Text can be 'range' value. Such as 'January 12 - 13, 2012' if (match[DATE_TO_GROUP]) { var endDate = match[DATE_TO_NUM_GROUP] ? endDate = parseInt(match[DATE_TO_NUM_GROUP]) : util.ORDINAL_WORDS[match[DATE_TO_GROUP].replace('-', ' ').trim().toLowerCase()]; result.end = result.start.clone(); result.end.assign('day', endDate); } result.tags['ENMonthNameMiddleEndianParser'] = true; return result; } };