UNPKG

bravey

Version:

A simple JavaScript NLP-like library to help you creating your own bot.

456 lines (392 loc) 17 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: nlp/Fuzzy.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: nlp/Fuzzy.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>/** * A version of the Natural Language Processing core. Doesn't follow entity order but tries to guess names by samples. Less precise but easier hits. * @constructor */ Bravey.Nlp.Fuzzy = function(nlpName, extensions) { extensions = extensions || {}; var intents = {}; var documentClassifier = new Bravey.DocumentClassifier(extensions); var entities = {}; var allEntities = []; var confidence = 0.75; function sortEntities(ent) { ent.sort(function(a, b) { if (a.position &lt; b.position) return -1; if (a.position > b.position) return 1; if (a.string.length > b.string.length) return -1; if (a.string.length &lt; b.string.length) return 1; if (a.priority > b.priority) return -1; if (a.priority &lt; b.priority) return 1; return 0; }); } function extractEntities(text, ent) { var out = [], done = {}; for (var i = 0; i &lt; ent.length; i++) if (!done[ent[i].entity]) { done[ent[i].entity] = 1; entities[ent[i].entity].getEntities(text, out); } sortEntities(out); return out; } function getEntities(text, intent) { var out = extractEntities(text, intent.entities); var outentities = [], outentitiesindex = {}, sentence = [], ent, pos = -1, nextid, outtext = "", exceedEntities = false, extraEntities = false, missingEntities = false, counters = {}, found = 0, prevstring; for (var i = 0; i &lt; out.length; i++) { ent = out[i].entity; if (out[i].position >= pos) { if (intent.index[ent]) { if (counters[ent] == undefined) counters[ent] = 0; if (nextid = intent.index[ent][counters[ent]]) { counters[ent]++; found++; var match = { position: out[i].position, entity: ent, value: out[i].value, string: out[i].string, id: nextid }; outentities.push(match); outentitiesindex[match.id] = match; if (pos == -1) prevstring = text.substr(0, out[i].position); else prevstring = text.substr(pos, out[i].position - pos); if (prevstring.length) sentence.push({ string: prevstring }); sentence.push(match); outtext += prevstring; outtext += "{" + ent + "}"; pos = out[i].position + out[i].string.length; } else exceedEntities = true; } else extraEntities = true; } } prevstring = text.substr(pos == -1 ? 0 : pos); if (prevstring.length) { if (prevstring.length) sentence.push({ string: prevstring }); outtext += prevstring; } return { found: found, missingEntities: missingEntities, exceedEntities: exceedEntities, extraEntities: extraEntities, text: outtext, entities: outentities, sentence: sentence, entitiesIndex: outentitiesindex }; } function expandIntentFromText(text, intent, names) { var ent, outtext = "", cur = -1, pos = -1, nextid, out = extractEntities(text, allEntities), counters = {}; for (var i = 0; i &lt; out.length; i++) { ent = out[i].entity; if (out[i].position > pos) { cur++; if (!intent.index[ent]) intent.index[ent] = []; if (counters[ent] == undefined) counters[ent] = 0; else counters[ent]++; nextid = intent.index[ent][counters[ent]]; if (!nextid) { nextid = intent.index[ent][counters[ent]] = names &amp;&amp; names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : ""); intent.entities.push({ entity: ent, id: nextid }); console.warn("Adding entity", nextid, "to", intent.name); } if (pos == -1) outtext += text.substr(0, out[i].position); else outtext += text.substr(pos, out[i].position - pos); outtext += "{" + ent + "}"; pos = out[i].position + out[i].string.length; } } outtext += text.substr(pos == -1 ? 0 : pos); return { text: outtext }; } function expandIntentFromTagged(text, intent, names) { var nextid, cur = -1, counters = {}; text.replace(/\{([.a-z_-]+)\}/g, function(m, ent) { cur++; if (!intent.index[ent]) intent.index[ent] = []; if (counters[ent] == undefined) counters[ent] = 0; else counters[ent]++; nextid = intent.index[ent][counters[ent]]; if (!nextid) { nextid = intent.index[ent][counters[ent]] = names &amp;&amp; names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : ""); intent.entities.push({ entity: ent, id: nextid }); console.warn("Adding entity", nextid, "to", intent.name); } }); return { text: text }; } function getAnyEntity(text) { var prevstring, outentities = [], outentitiesindex = {}, sentence = [], ent, nextid, outtext = "", counters = {}, pos = -1, cur = -1, found = 0, out = extractEntities(text, allEntities); for (var i = 0; i &lt; out.length; i++) { ent = out[i].entity; if (out[i].position > pos) { found++; cur++; if (counters[ent] == undefined) counters[ent] = 0; else counters[ent]++; nextid = ent + (counters[ent] ? counters[ent] : ""); var match = { position: out[i].position, entity: ent, value: out[i].value, string: out[i].string, id: nextid }; outentities.push(match); outentitiesindex[match.id] = match; if (pos == -1) prevstring = text.substr(0, out[i].position); else prevstring = text.substr(pos, out[i].position - pos); if (prevstring.length) sentence.push({ string: prevstring }); sentence.push(match); outtext += prevstring; outtext += "{" + ent + "}"; pos = out[i].position + out[i].string.length; } } prevstring = text.substr(pos == -1 ? 0 : pos); if (prevstring.length) { if (prevstring.length) sentence.push({ string: prevstring }); outtext += prevstring; } return { found: found, text: outtext, entities: outentities, sentence: sentence, entitiesIndex: outentitiesindex }; } /** * Adds an intent. * @param {string} intentName - The name of the new intent. * @param {IntentEntity[]} entities - The produced entities. * @returns {boolean} True when successful. */ this.addIntent = function(intentName, entities) { var index = {}; for (var i = 0; i &lt; entities.length; i++) { if (!index[entities[i].entity]) index[entities[i].entity] = []; index[entities[i].entity].push(entities[i].id); } intents[intentName] = { name: intentName, entities: entities, index: index }; return true; } /** * Adds an entity recognizer. * @param {EntityRecognizer} entity - The entity recognizer to be addded. * @returns {boolean} True when successful. */ this.addEntity = function(entity) { var entityName = entity.getName(); if (!entities[entityName]) allEntities.push({ entity: entityName, id: "none" }); entities[entityName] = entity; return true; } /** * Check if an entity with a given name exists. * @param {string} entityName - The entity name. * @returns {boolean} True when found. */ this.hasEntity = function(entityName) { return !!entities[entityName]; } /** * Set confidence ratio, from 0.5 to 1. The higher the more strict. * @param {number} ratio - The confidence ratio to be set. */ this.setConfidence = function(ratio) { confidence = ratio; } /** * Returns the confidence ratio. * @returns {number} The confidence ratio. */ this.getConfidence = function(c) { return confidence; } /** * Add a new document. * @param {string} text - The document content. * @param {string} intent - The related intent. * @param {boolean} [guess.fromFullSentence=false] - Indicates that the document is a full untagged sentence. (i.e. "Please call the 333-123456") * @param {boolean} [guess.fromTaggedSentence=true] - Indicates that the document is tagged with braces. (i.e "Please call the {phone_number}") * @param {boolean} [guess.expandIntent=false] - Extends or creates a new intent if exceeded entities are found. New entities id will be autogenerated and progressive. * @param {string[]} [guess.withNames=[]] - Uses the given names for auto-extending intents. Positions in array are matched with positions of entity into the specified sentence. * @returns {boolean} True when found. */ this.addDocument = function(text, intent, guess) { if (guess) { if (guess.fromFullSentence) { // From a full sentence... text = Bravey.Text.clean(text); if (guess.expandIntent) { // Expand intent with found items if (!intents[intent]) { console.warn("Adding intent", intent); this.addIntent(intent, []); } var expanded = expandIntentFromText(text, intents[intent], guess.withNames); return documentClassifier.addDocument(expanded.text, intent); } } else if (guess.fromTaggedSentence) { // From a {tagged} sentence... if (guess.expandIntent) { // Expand intent with found items if (!intents[intent]) { console.warn("Adding intent", intent); this.addIntent(intent, []); } var expanded = expandIntentFromTagged(text, intents[intent], guess.withNames); return documentClassifier.addDocument(expanded.text, intent); } } console.warn("Can't guess..."); return false; } else { // Link a marked sentence to a particular intent if (intents[intent]) return documentClassifier.addDocument(Bravey.Text.clean(text), intent); else { console.warn("Can't find intent", intent); return false; } } } /** * Check if a given sentence matches an intent and extracts its entities. * @param {string} text - The sentence to be processed. * @param {string} [method="default"] - The extraction method. "anyEntity" extracts all found entities and guess intent, regardless the intents structure. * @returns {NlpResult} When an intent is found. * @returns {false} When the sentence doesn't match any intent. */ this.test = function(text, method) { text = Bravey.Text.clean(text); switch (method) { case "anyEntity": { var result = getAnyEntity(text); var classification = documentClassifier.classifyDocument(result.text); result.score = classification.winner.score; result.intent = classification.winner.label; return result; } default: { // When entities are enough, check classifier. var classification, entlist, result = false, resultscore = -1, resultfound = -1; for (var intent in intents) { entlist = getEntities(text, intents[intent]); if (!entlist.exceedEntities &amp;&amp; !entlist.extraEntities &amp;&amp; !entlist.missingEntities) { // No unwanted entites, entity count under the threshold and 0 entities for no entities intents classification = documentClassifier.classifyDocument(entlist.text); if ((classification.scores[intent] > confidence) &amp;&amp; ((classification.scores[intent] > resultscore) || ((classification.scores[intent] == resultscore) &amp;&amp; (entlist.found > resultfound)))) { result = entlist; result.score = resultscore = classification.scores[intent]; resultfound = result.found; result.intent = intent; } } } return result; } } return false; } } /** Describes an entity to be matched in an intent. @typedef IntentEntity @type {Object} @property {string} entity The entity type to be found. @property {string} id The entity ID to be assigned when found. */ /** Describes a sentence classification and entities. @typedef NlpResult @type {Object} @property {Entity[]} entities The ordered list of found entities. @property {number[]} entitiesIndex An map version of entities, with key as entity ID and value as entity value. @property {string} intent The matched intent. @property {number} score The score of the matched sentence intent. */</code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="Bravey.ApiAiAdapter.html">ApiAiAdapter</a></li><li><a href="Bravey.ContextManager.html">ContextManager</a></li><li><a href="Bravey.DocumentClassifier.html">DocumentClassifier</a></li><li><a href="Bravey.EMailEntityRecognizer.html">EMailEntityRecognizer</a></li><li><a href="Bravey.Filter.BasicFilter.html">BasicFilter</a></li><li><a href="Bravey.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.EN.DateEntityRecognizer.html">DateEntityRecognizer</a></li><li><a href="Bravey.Language.EN.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.EN.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.Language.EN.Stemmer.html">Stemmer</a></li><li><a href="Bravey.Language.EN.TimeEntityRecognizer.html">TimeEntityRecognizer</a></li><li><a href="Bravey.Language.EN.TimePeriodEntityRecognizer.html">TimePeriodEntityRecognizer</a></li><li><a href="Bravey.Language.IT.DateEntityRecognizer.html">DateEntityRecognizer</a></li><li><a href="Bravey.Language.IT.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.IT.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.Language.IT.Stemmer.html">Stemmer</a></li><li><a href="Bravey.Language.IT.TimeEntityRecognizer.html">TimeEntityRecognizer</a></li><li><a href="Bravey.Language.IT.TimePeriodEntityRecognizer.html">TimePeriodEntityRecognizer</a></li><li><a href="Bravey.Language.PT.DateEntityRecognizer.html">DateEntityRecognizer</a></li><li><a href="Bravey.Language.PT.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.PT.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.Language.PT.Stemmer.html">Stemmer</a></li><li><a href="Bravey.Language.PT.TimeEntityRecognizer.html">TimeEntityRecognizer</a></li><li><a href="Bravey.Language.PT.TimePeriodEntityRecognizer.html">TimePeriodEntityRecognizer</a></li><li><a href="Bravey.Nlp.Fuzzy.html">Fuzzy</a></li><li><a href="Bravey.Nlp.Sequential.html">Sequential</a></li><li><a href="Bravey.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.RegexEntityRecognizer.html">RegexEntityRecognizer</a></li><li><a href="Bravey.SessionManager.InMemorySessionManager.html">InMemorySessionManager</a></li><li><a href="Bravey.StringEntityRecognizer.html">StringEntityRecognizer</a></li><li><a href="Bravey.Text.RegexMap.html">RegexMap</a></li></ul><h3>Namespaces</h3><ul><li><a href="Bravey.html">Bravey</a></li><li><a href="Bravey.Data.html">Data</a></li><li><a href="Bravey.Date.html">Date</a></li><li><a href="Bravey.File.html">File</a></li><li><a href="Bravey.Filter.html">Filter</a></li><li><a href="Bravey.Language.html">Language</a></li><li><a href="Bravey.Language.EN.html">EN</a></li><li><a href="Bravey.Language.IT.html">IT</a></li><li><a href="Bravey.Language.PT.html">PT</a></li><li><a href="Bravey.Nlp.html">Nlp</a></li><li><a href="Bravey.SessionManager.html">SessionManager</a></li><li><a href="Bravey.Text.html">Text</a></li></ul><h3><a href="global.html">Global</a></h3> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.4.3</a> </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>