UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

244 lines (243 loc) 9.36 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var utils = require("../utils"); var chrono = require("chrono-node"); var consts = require("../consts"); var EntityRecognizer = (function () { function EntityRecognizer() { } EntityRecognizer.findEntity = function (entities, type) { for (var i = 0; entities && i < entities.length; i++) { if (entities[i].type == type) { return entities[i]; } } return null; }; EntityRecognizer.findAllEntities = function (entities, type) { var found = []; for (var i = 0; entities && i < entities.length; i++) { if (entities[i].type == type) { found.push(entities[i]); } } return found; }; EntityRecognizer.parseTime = function (entities) { if (typeof entities == 'string') { entities = [EntityRecognizer.recognizeTime(entities)]; } return EntityRecognizer.resolveTime(entities); }; EntityRecognizer.resolveTime = function (entities) { var _this = this; var now = new Date(); var resolvedDate; var date; var time; entities.forEach(function (entity) { if (entity.resolution) { switch (entity.resolution.resolution_type || entity.type) { case 'builtin.datetime': case 'builtin.datetime.date': case 'builtin.datetime.time': var parts = (entity.resolution.date || entity.resolution.time).split('T'); if (!date && _this.dateExp.test(parts[0])) { date = parts[0]; } if (!time && parts[1]) { time = 'T' + parts[1]; if (time == 'TMO') { time = 'T08:00:00'; } else if (time == 'TNI') { time = 'T20:00:00'; } else if (time.length == 3) { time = time + ':00:00'; } else if (time.length == 6) { time = time + ':00'; } } break; case 'chrono.duration': var duration = entity; resolvedDate = duration.resolution.start; } } }); if (!resolvedDate && (date || time)) { if (!date) { date = utils.toDate8601(now); } if (time) { date += time; } resolvedDate = new Date(date); } return resolvedDate; }; EntityRecognizer.recognizeTime = function (utterance, refDate) { var response; try { var results = chrono.parse(utterance, refDate); if (results && results.length > 0) { var duration = results[0]; response = { type: 'chrono.duration', entity: duration.text, startIndex: duration.index, endIndex: duration.index + duration.text.length, resolution: { resolution_type: 'chrono.duration', start: duration.start.date() } }; if (duration.end) { response.resolution.end = duration.end.date(); } if (duration.ref) { response.resolution.ref = duration.ref; } response.score = duration.text.length / utterance.length; } } catch (err) { console.error('Error recognizing time: ' + err.toString()); response = null; } return response; }; EntityRecognizer.parseNumber = function (entities) { var entity; if (typeof entities == 'string') { entity = { type: 'text', entity: entities.trim() }; } else { entity = EntityRecognizer.findEntity(entities, 'builtin.number'); } if (entity) { var match = this.numberExp.exec(entity.entity); if (match) { return Number(match[0]); } var oWordMatch = this.findBestMatch(this.ordinalWords, entity.entity, 1.0); if (oWordMatch) { return oWordMatch.index + 1; } } return Number.NaN; }; EntityRecognizer.parseBoolean = function (utterance, context) { utterance = utterance.trim(); if (context) { var locale = context.preferredLocale(); var pattern = context.localizer.trygettext(locale, 'yesExp', consts.Library.system); if (pattern) { EntityRecognizer.yesExp = new RegExp(pattern, 'i'); } pattern = context.localizer.trygettext(locale, 'noExp', consts.Library.system); if (pattern) { EntityRecognizer.noExp = new RegExp(pattern, 'i'); } } if (EntityRecognizer.yesExp.test(utterance)) { return true; } else if (EntityRecognizer.noExp.test(utterance)) { return false; } return undefined; }; EntityRecognizer.findBestMatch = function (choices, utterance, threshold) { if (threshold === void 0) { threshold = 0.6; } var best; var matches = EntityRecognizer.findAllMatches(choices, utterance, threshold); matches.forEach(function (value) { if (!best || value.score > best.score) { best = value; } }); return best; }; EntityRecognizer.findAllMatches = function (choices, utterance, threshold) { if (threshold === void 0) { threshold = 0.6; } var matches = []; utterance = utterance.trim().toLowerCase(); var tokens = utterance.split(' '); EntityRecognizer.expandChoices(choices).forEach(function (choice, index) { var score = 0.0; var value = choice.trim().toLowerCase(); if (value.indexOf(utterance) >= 0) { score = utterance.length / value.length; } else if (utterance.indexOf(value) >= 0) { score = Math.min(0.5 + (value.length / utterance.length), 0.9); } else { var matched = {}; tokens.forEach(function (token) { if (value.indexOf(token) >= 0) { if (!matched[token]) { matched[token] = 1; } } }); var tokenizedValue = value.split(' '); var tokenScore = 0; for (var token in matched) { tokenizedValue.forEach(function (val) { if (val.indexOf(token) >= 0 && token.length <= val.length / 2) { matched[token]--; } else if (val.indexOf(token) == -1) { } else { matched[token]++; } }); } for (var token in matched) { if (matched[token] > 0) { tokenScore += token.length; } } score = tokenScore / value.length; score = score > 1 ? 1 : score; } if (score >= threshold) { matches.push({ index: index, entity: choice, score: score }); } }); return matches; }; EntityRecognizer.expandChoices = function (choices) { if (!choices) { return []; } else if (Array.isArray(choices)) { return choices; } else if (typeof choices == 'string') { return choices.split('|'); } else if (typeof choices == 'object') { var list = []; for (var key in choices) { list.push(key); } return list; } else { return [choices.toString()]; } }; EntityRecognizer.dateExp = /^\d{4}-\d{2}-\d{2}/i; EntityRecognizer.yesExp = /^(1|y|yes|yep|sure|ok|true)(\W|$)/i; EntityRecognizer.noExp = /^(2|n|no|nope|not|false)(\W|$)/i; EntityRecognizer.numberExp = /[+-]?(?:\d+\.?\d*|\d*\.?\d+)/; EntityRecognizer.ordinalWords = 'first|second|third|fourth|fifth|sixth|seventh|eigth|ninth|tenth'; return EntityRecognizer; }()); exports.EntityRecognizer = EntityRecognizer;