node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
139 lines (103 loc) • 4.71 kB
JavaScript
/*
*/
var ParsedComponents = require('../../result').ParsedComponents;
var Refiner = require('../refiner').Refiner;
var PATTERN = new RegExp("^\\s*(T|at|after|before|on|of|,|-)?\\s*$");
var isDateOnly = exports.isDateOnly = function(result) {
return !result.start.isCertain('hour');
}
var isTimeOnly = exports.isTimeOnly = function(result) {
return !result.start.isCertain('month') && !result.start.isCertain('weekday');
}
var isAbleToMerge = exports.isAbleToMerge = function(text, prevResult, curResult) {
var textBetween = text.substring(prevResult.index + prevResult.text.length, curResult.index);
return textBetween.match(PATTERN);
}
var mergeDateTimeComponent = exports.mergeDateTimeComponent = function(dateComponent, timeComponent) {
var dateTimeComponent = dateComponent.clone();
if (timeComponent.isCertain('hour')) {
dateTimeComponent.assign('hour', timeComponent.get('hour'));
dateTimeComponent.assign('minute', timeComponent.get('minute'));
dateTimeComponent.assign('second', timeComponent.get('second'));
} else {
dateTimeComponent.imply('hour', timeComponent.get('hour'));
dateTimeComponent.imply('minute', timeComponent.get('minute'));
dateTimeComponent.imply('second', timeComponent.get('second'));
}
if (timeComponent.isCertain('meridiem')) {
dateTimeComponent.assign('meridiem', timeComponent.get('meridiem'));
} else if (
timeComponent.get('meridiem') !== undefined &&
dateTimeComponent.get('meridiem') === undefined
) {
dateTimeComponent.imply('meridiem', timeComponent.get('meridiem'));
}
if (dateTimeComponent.get('meridiem') == 1 && dateTimeComponent.get('hour') < 12) {
if (timeComponent.isCertain('hour')) {
dateTimeComponent.assign('hour', dateTimeComponent.get('hour') + 12);
} else {
dateTimeComponent.imply('hour', dateTimeComponent.get('hour') + 12);
}
}
return dateTimeComponent;
}
function mergeResult(text, dateResult, timeResult){
var beginDate = dateResult.start;
var beginTime = timeResult.start;
var beginDateTime = mergeDateTimeComponent(beginDate, beginTime);
if (dateResult.end != null || timeResult.end != null) {
var endDate = dateResult.end == null ? dateResult.start : dateResult.end;
var endTime = timeResult.end == null ? timeResult.start : timeResult.end;
var endDateTime = mergeDateTimeComponent(endDate, endTime);
if (dateResult.end == null && endDateTime.date().getTime() < beginDateTime.date().getTime()) {
// Ex. 9pm - 1am
if (endDateTime.isCertain('day')) {
endDateTime.assign('day', endDateTime.get('day') + 1);
} else {
endDateTime.imply('day', endDateTime.get('day') + 1);
}
}
dateResult.end = endDateTime;
}
dateResult.start = beginDateTime;
var startIndex = Math.min(dateResult.index, timeResult.index);
var endIndex = Math.max(
dateResult.index + dateResult.text.length,
timeResult.index + timeResult.text.length);
dateResult.index = startIndex;
dateResult.text = text.substring(startIndex, endIndex);
for (var tag in timeResult.tags) {
dateResult.tags[tag] = true;
}
dateResult.tags['ENMergeDateAndTimeRefiner'] = true;
return dateResult;
}
exports.Refiner = function ENMergeDateTimeRefiner() {
Refiner.call(this);
this.refine = function(text, results, opt) {
if (results.length < 2) return results;
var mergedResult = [];
var currResult = null;
var prevResult = null;
for (var i = 1; i < results.length; i++) {
currResult = results[i];
prevResult = results[i-1];
if (isDateOnly(prevResult) && isTimeOnly(currResult)
&& isAbleToMerge(text, prevResult, currResult)) {
prevResult = mergeResult(text, prevResult, currResult);
currResult = results[i + 1];
i += 1;
} else if (isDateOnly(currResult) && isTimeOnly(prevResult)
&& isAbleToMerge(text, prevResult, currResult)) {
prevResult = mergeResult(text, currResult, prevResult);
currResult = results[i + 1];
i += 1;
}
mergedResult.push(prevResult);
}
if (currResult != null) {
mergedResult.push(currResult);
}
return mergedResult;
}
}