@voiceflow/common
Version:
Junk drawer of utility functions
151 lines (150 loc) • 6.99 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.cleanVFIntentName = exports.injectUtteranceSpaces = exports.utteranceEntityPermutations = exports.VF_ENTITY_REGEXP = exports.getSlotType = exports.getUtterancesWithSlotNames = exports.formatIntentName = void 0;
const sample_js_1 = __importDefault(require("lodash/sample.js"));
const constants_1 = require("../constants");
const slot_1 = require("./slot");
const formatIntentName = (name) => {
if (!name) {
return name;
}
let formattedName = '';
// replace white spaces with underscores
formattedName = name.replace(constants_1.SPACE_REGEXP, '_');
// replace numbers with equivalent capital letter. Ex: 0 = A, 1 = B
formattedName = formattedName.replace(/\d/g, (digit) => String.fromCharCode(parseInt(digit, 10) + 65));
return formattedName;
};
exports.formatIntentName = formatIntentName;
const getUtterancesWithSlotNames = ({ slots = [], utterances = [], }) => utterances
.filter((utterance) => !!utterance.text?.trim())
.map(({ text }) => text.replace(constants_1.SLOT_REGEXP, (substring, _name, key) => {
const slot = slots.find((_slot) => _slot.key === key);
return slot?.name ? `{${slot.name}}` : substring;
}));
exports.getUtterancesWithSlotNames = getUtterancesWithSlotNames;
const getSlotType = (slots, slot) => {
let type = slot.name;
const lowerCaseType = slot.type.value?.toLowerCase() ?? '';
if (!!slot.type.value && lowerCaseType !== constants_1.LOWER_CASE_CUSTOM_SLOT_TYPE) {
const builtinSlot = slots.find((_slot) => _slot.label.toLowerCase() === lowerCaseType);
if (!builtinSlot) {
type = slot.type.value; // Platform specific slot
}
else {
({ type } = builtinSlot);
}
}
return type;
};
exports.getSlotType = getSlotType;
exports.VF_ENTITY_REGEXP = /{{\[(\w{1,32})]\.(\w{1,32})}}/gi;
// extension of the String.prototype.replace format
const continuousReplace = (text, regex, replacer) => {
// regex without any global flags (g or i)
const localRegex = new RegExp(regex, '');
let temp;
let current = text;
// keep replacing until there is nothing else to replace (local replaces one instance at a time, this is important to keep positional offset data)
while (temp !== current) {
temp = current;
current = current.replace(localRegex, replacer);
}
return current;
};
const utteranceEntityPermutations = ({ utterances, entitiesByID, limit = 22, replacer, getSamples = slot_1.getAllSamples, }) => {
const newUtterances = [];
const entityRef = {};
const addNewUtterance = (utterance) => {
if (!utterance?.trim())
return;
const entities = [];
// Find all occurences of {entityName} in training utterances
const text = continuousReplace(utterance, exports.VF_ENTITY_REGEXP, (_match, entityName, entityID, offset) => {
const entity = entitiesByID[entityID];
if (!entity)
return entityName;
const sample = (entityRef[entityID]?.samples.shift() ||
(0, sample_js_1.default)(getSamples(entity?.inputs)) ||
entityName).trim();
if (!entityRef[entityID]?.samples?.length)
delete entityRef[entityID];
const replacement = replacer?.(sample, entityID) ?? sample;
// This module should additionally create one full training utterance with positional entity (startPos, endPos, entityName).
const startPos = offset || 0;
const endPos = startPos + replacement.length - 1;
entities.push({
startPos,
endPos,
entity: entity.name,
key: entityID,
});
// Replace the entities with the replacement value
return replacement;
});
newUtterances.push({
text,
entities,
});
};
// find all the entities referenced by this intent
// first pass over all utterances guarantees every utterance used
utterances.forEach((utterance) => {
// find all the entities used in this utterance
const entityMatches = [...utterance.matchAll(exports.VF_ENTITY_REGEXP)];
entityMatches.forEach((match) => {
const entityID = match[2];
// if this entity hasn't been visited before, initialize the ref and populate samples with all synonyms of the entity
if (!entityRef[entityID]) {
entityRef[entityID] = { samples: [], utterances: [] };
const entity = entitiesByID[entityID];
if (entity) {
entityRef[entityID].samples.push(...getSamples(entity.inputs));
}
}
entityRef[entityID].utterances.push(utterance);
});
addNewUtterance(utterance);
});
while (Object.keys(entityRef).length > 0 && newUtterances.length < limit) {
const entityID = Object.keys(entityRef)[0];
const utterancesUsingEntity = entityRef[entityID]?.utterances;
if (utterancesUsingEntity?.length) {
addNewUtterance(utterancesUsingEntity[newUtterances.length % utterancesUsingEntity.length]);
}
else {
delete entityRef[entityID];
}
}
return newUtterances;
};
exports.utteranceEntityPermutations = utteranceEntityPermutations;
const ALPHANUMERIC_REGEXP = /[\dA-Za-z{}]/;
// some NLP/NLU models do not allow entity classifications without a space seperator: 'I work at {startupName}flow' => 'I work at {startupName} flow'
const injectUtteranceSpaces = (originalUtterance) => {
let spacesAdded = 0;
let utterance = originalUtterance ? originalUtterance.trim() : '';
const slots = [...utterance.matchAll(exports.VF_ENTITY_REGEXP)];
slots.forEach((slot) => {
let index = slot.index + spacesAdded;
// Check if space should be added before slot
if (index > 0 && utterance[index - 1].match(ALPHANUMERIC_REGEXP)) {
utterance = `${utterance.slice(0, index)} ${utterance.slice(index)}`;
++spacesAdded;
++index;
}
// Check if space should be added after slot
if (index + slot[0].length < utterance.length - 1 && utterance[index + slot[0].length].match(ALPHANUMERIC_REGEXP)) {
utterance = `${utterance.slice(0, index + slot[0].length)} ${utterance.slice(index + slot[0].length)}`;
++spacesAdded;
}
});
return utterance;
};
exports.injectUtteranceSpaces = injectUtteranceSpaces;
// VF.HELP -> help
const cleanVFIntentName = (intentName) => intentName.startsWith('VF.') ? intentName.slice(3).toLowerCase() : intentName;
exports.cleanVFIntentName = cleanVFIntentName;