UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

111 lines (88 loc) 3.53 kB
/* */ var moment = require('moment'); var Parser = require('../parser').Parser; var ParsedResult = require('../../result').ParsedResult; var util = require('../../utils/DE'); var PATTERN = new RegExp('(\\W|^)' + '(?:am\\s*?)?' + '(?:(Sonntag|Montag|Dienstag|Mittwoch|Donnerstag|Freitag|Samstag|So|Mo|Di|Mi|Do|Fr|Sa)\\s*,?\\s*)?' + '(?:den\\s*)?' + '([0-9]{1,2})\\.' + '(?:\\s*(?:bis(?:\\s*(?:am|zum))?|\\-|\\–|\\s)\\s*([0-9]{1,2})\\.)?\\s*' + '(Jan(?:uar|\\.)?|Feb(?:ruar|\\.)?|Mär(?:z|\\.)?|Maerz|Mrz\\.?|Apr(?:il|\\.)?|Mai|Jun(?:i|\\.)?|Jul(?:i|\\.)?|Aug(?:ust|\\.)?|Sep(?:t|t\\.|tember|\\.)?|Okt(?:ober|\\.)?|Nov(?:ember|\\.)?|Dez(?:ember|\\.)?)' + '(?:' + ',?\\s*([0-9]{1,4}(?![^\\s]\\d))' + '(\\s*[vn]\\.?\\s*C(?:hr)?\\.?)?' + ')?' + '(?=\\W|$)', 'i' ); var WEEKDAY_GROUP = 2; var DATE_GROUP = 3; var DATE_TO_GROUP = 4; var MONTH_NAME_GROUP = 5; var YEAR_GROUP = 6; var YEAR_BE_GROUP = 7; exports.Parser = function DEMonthNameLittleEndianParser(){ Parser.apply(this, arguments); this.pattern = function() { return PATTERN; } this.extract = function(text, ref, match, opt){ var result = new ParsedResult({ text: match[0].substr(match[1].length, match[0].length - match[1].length), index: match.index + match[1].length, ref: ref, }); var month = match[MONTH_NAME_GROUP]; month = util.MONTH_OFFSET[month.toLowerCase()]; var day = match[DATE_GROUP]; day = parseInt(day); var year = null; if (match[YEAR_GROUP]) { year = match[YEAR_GROUP]; year = parseInt(year); if(match[YEAR_BE_GROUP]){ if (/v/i.test(match[YEAR_BE_GROUP])) { // v.Chr. year = -year; } } else if (year < 100){ year = year + 2000; } } if(year){ result.start.assign('day', day); result.start.assign('month', month); result.start.assign('year', year); } else { //Find the most appropriated year var refMoment = moment(ref); refMoment.month(month - 1); refMoment.date(day); refMoment.year(moment(ref).year()); var nextYear = refMoment.clone().add(1, 'y'); var lastYear = refMoment.clone().add(-1, 'y'); if( Math.abs(nextYear.diff(moment(ref))) < Math.abs(refMoment.diff(moment(ref))) ){ refMoment = nextYear; } else if( Math.abs(lastYear.diff(moment(ref))) < Math.abs(refMoment.diff(moment(ref))) ){ refMoment = lastYear; } result.start.assign('day', day); result.start.assign('month', month); result.start.imply('year', refMoment.year()); } // Weekday component if (match[WEEKDAY_GROUP]) { var weekday = match[WEEKDAY_GROUP]; weekday = util.WEEKDAY_OFFSET[weekday.toLowerCase()] result.start.assign('weekday', weekday); } // Text can be 'range' value. Such as '12 - 13 January 2012' if (match[DATE_TO_GROUP]) { result.end = result.start.clone(); result.end.assign('day', parseInt(match[DATE_TO_GROUP])); } result.tags['DEMonthNameLittleEndianParser'] = true; return result; }; }