UNPKG

bravey

Version:

A simple JavaScript NLP-like library to help you creating your own bot.

447 lines (378 loc) 16.6 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: nlp/Sequential.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: nlp/Sequential.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>/** * A version of the Natural Language Processing core. It processess entities in strict sequential order. More precise but harder hits. * @constructor */ Bravey.Nlp.Sequential = function(nlpName, extensions) { extensions = extensions || {}; var intents = {}; var documentClassifier = new Bravey.DocumentClassifier(extensions); var entities = {}; var allEntities = []; var confidence = 0.75; function getIntentRoot(intentName) { return intentName.split(/~/)[0]; } function sortEntities(ent) { ent.sort(function(a, b) { if (a.position &lt; b.position) return -1; if (a.position > b.position) return 1; if (a.string.length > b.string.length) return -1; if (a.string.length &lt; b.string.length) return 1; if (a.priority > b.priority) return -1; if (a.priority &lt; b.priority) return 1; return 0; }); } function extractEntities(text, ent) { var out = [], done = {}; for (var i = 0; i &lt; ent.length; i++) if (!done[ent[i].entity]) { done[ent[i].entity] = 1; entities[ent[i].entity].getEntities(text, out); } sortEntities(out); return out; } function guessIntent(text, root, names) { var intentname = root || "", nextid, ent, outtext = "", entities = [], counters = {}, cur = 0, pos = -1; var out = extractEntities(text, allEntities); for (var i = 0; i &lt; out.length; i++) { ent = out[i].entity; if (out[i].position >= pos) { intentname += "~" + ent; if (counters[ent] == undefined) counters[ent] = 0; else counters[ent]++; nextid = names &amp;&amp; names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : ""); entities.push({ entity: ent, id: nextid }); if (pos == -1) outtext += text.substr(0, out[i].position); else outtext += text.substr(pos, out[i].position - pos); outtext += "{" + ent + "}"; pos = out[i].position + out[i].string.length; cur++; } } outtext += text.substr(pos == -1 ? 0 : pos); return { error: false, text: outtext, name: intentname, entities: entities } } function guessIntentFromTagged(text, root, names) { var nextid, cur = -1, counters = {}; var intentname = root || "", outentities = [], counters = {}, cur = 0, error = false; text.replace(/\{([.a-z_-]+)\}/g, function(m, ent) { if (!entities[ent]) error = ent; else { intentname += "~" + ent; if (counters[ent] == undefined) counters[ent] = 0; else counters[ent]++; nextid = names &amp;&amp; names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : ""); outentities.push({ entity: ent, id: nextid }); cur++; } }); return { error: error, text: text, name: intentname, entities: outentities }; } function getEntities(text, intent) { var out = extractEntities(text, intent.entities); var outentities = [], outentitiesindex = {}, missingEntities = false, ent, pos = -1, entitypos = 0; function forward() { // @TODO: This part can be improved. while (intent.entities[entitypos] &amp;&amp; entities[intent.entities[entitypos].entity].expand) { var match = { position: pos &lt; 0 ? 0 : pos, entity: intent.entities[entitypos].entity, value: "", string: text.substr(pos &lt; 0 ? 0 : pos), id: intent.entities[entitypos].id }; outentitiesindex[match.id] = match; outentities.push(match); entitypos++; } } forward(); if (out.length) for (var i = 0; i &lt; out.length; i++) { ent = out[i].entity; if (out[i].position >= pos) { if (intent.entities[entitypos].entity == ent) { var match = { position: out[i].position, entity: ent, value: out[i].value, string: out[i].string, id: intent.entities[entitypos].id }; outentities.push(match); outentitiesindex[match.id] = match; pos = out[i].position + out[i].string.length; entitypos++; forward(); if (entitypos >= intent.entities.length) break; } } } else missingEntities = intent.entities.length; var nextentity, outtext = "", prevstring = "", pos = 0, sentence = [], oldposition; for (var i = 0; i &lt; outentities.length; i++) { if (entities[outentities[i].entity].expand) { nextentity = outentities[i + 1]; if (nextentity) outentities[i].string = outentities[i].string.substr(0, nextentity.position - outentities[i].position); oldposition = outentities[i].position; entities[outentities[i].entity].expand(outentities[i]); if (entities[outentities[i].entity].position > oldposition) { prevstring = text.substr(pos, entities[outentities[i].entity].position - oldposition); sentence.push({ string: prevstring }); outtext += prevstring; pos += entities[outentities[i].entity].position - oldposition; } } prevstring = text.substr(pos, outentities[i].position - pos); if (prevstring) { sentence.push({ string: prevstring }); outtext += prevstring; } sentence.push(outentities[i]); outtext += "{" + outentities[i].entity + "}"; pos = outentities[i].position + outentities[i].string.length; } if (prevstring = text.substr(pos)) { sentence.push({ string: prevstring }); outtext += prevstring; } return { found: outentities.length, exceedEntities: false, missingEntities: outentities.length &lt; intent.entities.length, extraEntities: outentities.length > intent.entities.length, text: outtext, entities: outentities, sentence: sentence, entitiesIndex: outentitiesindex }; } /** * Adds an intent. * @param {string} intentName - The name of the new intent. * @param {IntentEntity[]} entities - The produced entities. * @returns {boolean} True when successful. */ this.addIntent = function(intentName, entities) { var index = {}, intentFullName = intentName; for (var i = 0; i &lt; entities.length; i++) { intentFullName += "~" + entities[i].entity; if (!index[entities[i].entity]) index[entities[i].entity] = []; index[entities[i].entity].push(entities[i].id); } intents[intentFullName] = { name: intentFullName, root: intentName, entities: entities, index: index }; return true; } /** * Adds an entity recognizer. * @param {EntityRecognizer} entity - The entity recognizer to be addded. * @returns {boolean} True when successful. */ this.addEntity = function(entity) { var entityName = entity.getName(); if (!entities[entityName]) allEntities.push({ entity: entityName, id: "none" }); entities[entityName] = entity; return true; } /** * Check if an entity with a given name exists. * @param {string} entityName - The entity name. * @returns {boolean} True when found. */ this.hasEntity = function(entityName) { return !!entities[entityName]; } /** * Set confidence ratio, from 0.5 to 1. The higher the more strict. * @param {number} ratio - The confidence ratio to be set. */ this.setConfidence = function(ratio) { confidence = ratio; } /** * Returns the confidence ratio. * @returns {number} The confidence ratio. */ this.getConfidence = function(c) { return confidence; } /** * Add a new document. * @param {string} text - The document content. * @param {string} intent - The related intent. If not guessing, must prepend entity types separated by ~. (i.e. "call~phone_number") * @param {boolean} [guess.fromFullSentence=false] - Indicates that the document is a full untagged sentence. (i.e. "Please call the 333-123456") * @param {boolean} [guess.fromTaggedSentence=true] - Indicates that the document is tagged with braces. (i.e "Please call the {phone_number}") * @param {boolean} [guess.expandIntent=false] - Extends or creates a new intent if exceeded entities are found. New entities id will be autogenerated and progressive. * @param {string[]} [guess.withNames=[]] - Uses the given names for auto-extending intents. Positions in array are matched with positions of entity into the specified sentence. * @returns {boolean} True when found. */ this.addDocument = function(text, intent, guess) { if (guess) { if (guess.fromFullSentence) { // From a full sentence... text = Bravey.Text.clean(text); if (guess.expandIntent) { // Expand intent with found items var found = guessIntent(text, intent, guess.withNames); if (!intents[found.name]) { console.warn("Adding intent", found.name); this.addIntent(intent, found.entities); } return documentClassifier.addDocument(found.text, intent); } } else if (guess.fromTaggedSentence) { // From a {tagged} sentence... var found = guessIntentFromTagged(text, intent, guess.withNames); if (found.error !== false) { console.warn("Can't find entity typed", found.error); return false; } else { if (!intents[found.name]) { console.warn("Adding intent", found.name); this.addIntent(intent, found.entities); } return documentClassifier.addDocument(found.text, intent); } } console.warn("Can't guess..."); return false; } else { // Link a marked sentence to a particular intent if (intents[intent]) return documentClassifier.addDocument(Bravey.Text.clean(text), getIntentRoot(intent)); else { console.warn("Can't find intent", intent); return false; } } } /** * Check if a given sentence matches an intent and extracts its entities. * @param {string} text - The sentence to be processed. * @param {string} [method="default"] - The extraction method. * @returns {NlpResult} When an intent is found. * @returns {false} When the sentence doesn't match any intent. */ this.test = function(text, method) { text = Bravey.Text.clean(text); switch (method) { default: { // When entities are enough, check classifier. var score, classification, entlist, result = false, resultscore = -1, resultfound = -1; for (var intent in intents) { entlist = getEntities(text, intents[intent]); if (!entlist.exceedEntities &amp;&amp; !entlist.extraEntities &amp;&amp; !entlist.missingEntities) { // No unwanted entites, entity count under the threshold and 0 entities for no entities intents classification = documentClassifier.classifyDocument(entlist.text); score = classification.scores[intents[intent].root]; if ((score > confidence) &amp;&amp; ((score > resultscore) || (entlist.found > resultfound))) { result = entlist; result.score = resultscore = score; resultfound = result.found; result.intent = intents[intent].root; } } } return result; } } return false; } } /** Describes an entity to be matched in an intent. @typedef IntentEntity @type {Object} @property {string} entity The entity type to be found. @property {string} id The entity ID to be assigned when found. */ /** Describes a sentence classification and entities. @typedef NlpResult @type {Object} @property {Entity[]} entities The ordered list of found entities. @property {number[]} entitiesIndex An map version of entities, with key as entity ID and value as entity value. @property {string} intent The matched intent. @property {number} score The score of the matched sentence intent. */</code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="Bravey.ApiAiAdapter.html">ApiAiAdapter</a></li><li><a href="Bravey.ContextManager.html">ContextManager</a></li><li><a href="Bravey.DocumentClassifier.html">DocumentClassifier</a></li><li><a href="Bravey.EMailEntityRecognizer.html">EMailEntityRecognizer</a></li><li><a href="Bravey.Filter.BasicFilter.html">BasicFilter</a></li><li><a href="Bravey.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.EN.DateEntityRecognizer.html">DateEntityRecognizer</a></li><li><a href="Bravey.Language.EN.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.EN.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.Language.EN.Stemmer.html">Stemmer</a></li><li><a href="Bravey.Language.EN.TimeEntityRecognizer.html">TimeEntityRecognizer</a></li><li><a href="Bravey.Language.EN.TimePeriodEntityRecognizer.html">TimePeriodEntityRecognizer</a></li><li><a href="Bravey.Language.IT.DateEntityRecognizer.html">DateEntityRecognizer</a></li><li><a href="Bravey.Language.IT.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.IT.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.Language.IT.Stemmer.html">Stemmer</a></li><li><a href="Bravey.Language.IT.TimeEntityRecognizer.html">TimeEntityRecognizer</a></li><li><a href="Bravey.Language.IT.TimePeriodEntityRecognizer.html">TimePeriodEntityRecognizer</a></li><li><a href="Bravey.Language.PT.DateEntityRecognizer.html">DateEntityRecognizer</a></li><li><a href="Bravey.Language.PT.FreeTextEntityRecognizer.html">FreeTextEntityRecognizer</a></li><li><a href="Bravey.Language.PT.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.Language.PT.Stemmer.html">Stemmer</a></li><li><a href="Bravey.Language.PT.TimeEntityRecognizer.html">TimeEntityRecognizer</a></li><li><a href="Bravey.Language.PT.TimePeriodEntityRecognizer.html">TimePeriodEntityRecognizer</a></li><li><a href="Bravey.Nlp.Fuzzy.html">Fuzzy</a></li><li><a href="Bravey.Nlp.Sequential.html">Sequential</a></li><li><a href="Bravey.NumberEntityRecognizer.html">NumberEntityRecognizer</a></li><li><a href="Bravey.RegexEntityRecognizer.html">RegexEntityRecognizer</a></li><li><a href="Bravey.SessionManager.InMemorySessionManager.html">InMemorySessionManager</a></li><li><a href="Bravey.StringEntityRecognizer.html">StringEntityRecognizer</a></li><li><a href="Bravey.Text.RegexMap.html">RegexMap</a></li></ul><h3>Namespaces</h3><ul><li><a href="Bravey.html">Bravey</a></li><li><a href="Bravey.Data.html">Data</a></li><li><a href="Bravey.Date.html">Date</a></li><li><a href="Bravey.File.html">File</a></li><li><a href="Bravey.Filter.html">Filter</a></li><li><a href="Bravey.Language.html">Language</a></li><li><a href="Bravey.Language.EN.html">EN</a></li><li><a href="Bravey.Language.IT.html">IT</a></li><li><a href="Bravey.Language.PT.html">PT</a></li><li><a href="Bravey.Nlp.html">Nlp</a></li><li><a href="Bravey.SessionManager.html">SessionManager</a></li><li><a href="Bravey.Text.html">Text</a></li></ul><h3><a href="global.html">Global</a></h3> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.4.3</a> </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>