UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

130 lines (104 loc) 3.9 kB
/* Date format with slash "/" (also "-" and ".") between numbers - Tuesday 11/3/2015 - 11/3/2015 - 11/3 By default the paser us "middle-endien" format (US English), then fallback to little-endian if failed. - 11/3/2015 = November 3rd, 2015 - 23/4/2015 = April 23th, 2015 If "littleEndian" config is set, the parser will try the little-endian first. - 11/3/2015 = March 11th, 2015 */ var moment = require('moment'); var Parser = require('../parser').Parser; var ParsedResult = require('../../result').ParsedResult; var PATTERN = new RegExp('(\\W|^)' + '(?:' + '(?:on\\s*?)?' + '((?:sun|mon|tues?|wed(?:nes)?|thu(?:rs?)?|fri|sat(?:ur)?)(?:day)?)' + '\\s*\\,?\\s*' + ')?' + '([0-3]{0,1}[0-9]{1})[\\/\\.\\-]([0-3]{0,1}[0-9]{1})' + '(?:' + '[\\/\\.\\-]' + '([0-9]{4}\s*\,?\s*|[0-9]{2}\s*\,?\s*)' + ')?' + '(\\W|$)', 'i'); var DAYS_OFFSET = { 'sunday': 0, 'sun': 0, 'monday': 1, 'mon': 1,'tuesday': 2, 'wednesday': 3, 'wed': 3, 'thursday': 4, 'thur': 4,'friday': 5, 'fri': 5,'saturday': 6, 'sat': 6,} var OPENNING_GROUP = 1; var ENDING_GROUP = 6; var WEEKDAY_GROUP = 2; var FIRST_NUMBERS_GROUP = 3; var SECOND_NUMBERS_GROUP = 4; var YEAR_GROUP = 5; exports.Parser = function ENSlashDateFormatParser(config) { Parser.apply(this, arguments); config = config || {}; var littleEndian = config.littleEndian; var MONTH_GROUP = littleEndian ? SECOND_NUMBERS_GROUP : FIRST_NUMBERS_GROUP; var DAY_GROUP = littleEndian ? FIRST_NUMBERS_GROUP : SECOND_NUMBERS_GROUP; this.pattern = function () { return PATTERN; }; this.extract = function(text, ref, match, opt){ if(match[OPENNING_GROUP] == '/' || match[ENDING_GROUP] == '/') { // Long skip, if there is some overlapping like: // XX[/YY/ZZ] // [XX/YY/]ZZ match.index += match[0].length return; } var index = match.index + match[OPENNING_GROUP].length; var text = match[0].substr(match[OPENNING_GROUP].length, match[0].length - match[ENDING_GROUP].length); var result = new ParsedResult({ text: text, index: index, ref: ref, }); if(text.match(/^\d\.\d$/)) return; if(text.match(/^\d\.\d{1,2}\.\d{1,2}$/)) return; // MM/dd -> OK // MM.dd -> NG if(!match[YEAR_GROUP] && match[0].indexOf('/') < 0) return; var date = null; var year = match[YEAR_GROUP] || moment(ref).year() + ''; var month = match[MONTH_GROUP]; var day = match[DAY_GROUP]; month = parseInt(month); day = parseInt(day); year = parseInt(year); if(month < 1 || month > 12) { if(month > 12) { // dd/mm/yyyy date format if day looks like a month, and month // looks like a day. if (day >= 1 && day <= 12 && month >= 13 && month <= 31) { // unambiguous var tday = month; month = day; day = tday; } else { // both month and day are <= 12 return null; } } } if(day < 1 || day > 31) return null; if(year < 100){ if (year > 50) { year = year + 1900; } else { year = year + 2000; } } result.start.assign('day', day); result.start.assign('month', month); result.start.assign('year', year); //Day of week if(match[WEEKDAY_GROUP]) { result.start.assign('weekday', DAYS_OFFSET[match[WEEKDAY_GROUP].toLowerCase()]); } result.tags['ENSlashDateFormatParser'] = true; return result; }; };