UNPKG

@voiceflow/common

Version:

Junk drawer of utility functions

github.com/voiceflow/libs

151 lines (150 loc) • 6.99 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.cleanVFIntentName = exports.injectUtteranceSpaces = exports.utteranceEntityPermutations = exports.VF_ENTITY_REGEXP = exports.getSlotType = exports.getUtterancesWithSlotNames = exports.formatIntentName = void 0; const sample_js_1 = __importDefault(require("lodash/sample.js")); const constants_1 = require("../constants"); const slot_1 = require("./slot"); const formatIntentName = (name) => { if (!name) { return name; } let formattedName = ''; // replace white spaces with underscores formattedName = name.replace(constants_1.SPACE_REGEXP, '_'); // replace numbers with equivalent capital letter. Ex: 0 = A, 1 = B formattedName = formattedName.replace(/\d/g, (digit) => String.fromCharCode(parseInt(digit, 10) + 65)); return formattedName; }; exports.formatIntentName = formatIntentName; const getUtterancesWithSlotNames = ({ slots = [], utterances = [], }) => utterances .filter((utterance) => !!utterance.text?.trim()) .map(({ text }) => text.replace(constants_1.SLOT_REGEXP, (substring, _name, key) => { const slot = slots.find((_slot) => _slot.key === key); return slot?.name ? `{${slot.name}}` : substring; })); exports.getUtterancesWithSlotNames = getUtterancesWithSlotNames; const getSlotType = (slots, slot) => { let type = slot.name; const lowerCaseType = slot.type.value?.toLowerCase() ?? ''; if (!!slot.type.value && lowerCaseType !== constants_1.LOWER_CASE_CUSTOM_SLOT_TYPE) { const builtinSlot = slots.find((_slot) => _slot.label.toLowerCase() === lowerCaseType); if (!builtinSlot) { type = slot.type.value; // Platform specific slot } else { ({ type } = builtinSlot); } } return type; }; exports.getSlotType = getSlotType; exports.VF_ENTITY_REGEXP = /{{\[(\w{1,32})]\.(\w{1,32})}}/gi; // extension of the String.prototype.replace format const continuousReplace = (text, regex, replacer) => { // regex without any global flags (g or i) const localRegex = new RegExp(regex, ''); let temp; let current = text; // keep replacing until there is nothing else to replace (local replaces one instance at a time, this is important to keep positional offset data) while (temp !== current) { temp = current; current = current.replace(localRegex, replacer); } return current; }; const utteranceEntityPermutations = ({ utterances, entitiesByID, limit = 22, replacer, getSamples = slot_1.getAllSamples, }) => { const newUtterances = []; const entityRef = {}; const addNewUtterance = (utterance) => { if (!utterance?.trim()) return; const entities = []; // Find all occurences of {entityName} in training utterances const text = continuousReplace(utterance, exports.VF_ENTITY_REGEXP, (_match, entityName, entityID, offset) => { const entity = entitiesByID[entityID]; if (!entity) return entityName; const sample = (entityRef[entityID]?.samples.shift() || (0, sample_js_1.default)(getSamples(entity?.inputs)) || entityName).trim(); if (!entityRef[entityID]?.samples?.length) delete entityRef[entityID]; const replacement = replacer?.(sample, entityID) ?? sample; // This module should additionally create one full training utterance with positional entity (startPos, endPos, entityName). const startPos = offset || 0; const endPos = startPos + replacement.length - 1; entities.push({ startPos, endPos, entity: entity.name, key: entityID, }); // Replace the entities with the replacement value return replacement; }); newUtterances.push({ text, entities, }); }; // find all the entities referenced by this intent // first pass over all utterances guarantees every utterance used utterances.forEach((utterance) => { // find all the entities used in this utterance const entityMatches = [...utterance.matchAll(exports.VF_ENTITY_REGEXP)]; entityMatches.forEach((match) => { const entityID = match[2]; // if this entity hasn't been visited before, initialize the ref and populate samples with all synonyms of the entity if (!entityRef[entityID]) { entityRef[entityID] = { samples: [], utterances: [] }; const entity = entitiesByID[entityID]; if (entity) { entityRef[entityID].samples.push(...getSamples(entity.inputs)); } } entityRef[entityID].utterances.push(utterance); }); addNewUtterance(utterance); }); while (Object.keys(entityRef).length > 0 && newUtterances.length < limit) { const entityID = Object.keys(entityRef)[0]; const utterancesUsingEntity = entityRef[entityID]?.utterances; if (utterancesUsingEntity?.length) { addNewUtterance(utterancesUsingEntity[newUtterances.length % utterancesUsingEntity.length]); } else { delete entityRef[entityID]; } } return newUtterances; }; exports.utteranceEntityPermutations = utteranceEntityPermutations; const ALPHANUMERIC_REGEXP = /[\dA-Za-z{}]/; // some NLP/NLU models do not allow entity classifications without a space seperator: 'I work at {startupName}flow' => 'I work at {startupName} flow' const injectUtteranceSpaces = (originalUtterance) => { let spacesAdded = 0; let utterance = originalUtterance ? originalUtterance.trim() : ''; const slots = [...utterance.matchAll(exports.VF_ENTITY_REGEXP)]; slots.forEach((slot) => { let index = slot.index + spacesAdded; // Check if space should be added before slot if (index > 0 && utterance[index - 1].match(ALPHANUMERIC_REGEXP)) { utterance = `${utterance.slice(0, index)} ${utterance.slice(index)}`; ++spacesAdded; ++index; } // Check if space should be added after slot if (index + slot[0].length < utterance.length - 1 && utterance[index + slot[0].length].match(ALPHANUMERIC_REGEXP)) { utterance = `${utterance.slice(0, index + slot[0].length)} ${utterance.slice(index + slot[0].length)}`; ++spacesAdded; } }); return utterance; }; exports.injectUtteranceSpaces = injectUtteranceSpaces; // VF.HELP -> help const cleanVFIntentName = (intentName) => intentName.startsWith('VF.') ? intentName.slice(3).toLowerCase() : intentName; exports.cleanVFIntentName = cleanVFIntentName;