intent-tools
Version:
Tools for processing Rasa.ai JSONs to text learning sets for FB FastText
75 lines (60 loc) • 1.87 kB
JavaScript
/*
* @author David Menger
*/
;
const fs = require('fs');
const JSONStream = require('JSONStream');
const eventStream = require('event-stream');
const filters = require('./filters');
const Pipeline = require('./Pipeline');
/**
* Create fast-text learning set from Rasa intents json
*
* @param {string|ReadableStream} input - path of Rasa intent set or stream
* @param {string|Writable} output - path or stream to write fast-text learning set
* @param {Array} [pipeline] - array of transform streams to modify the learning set
* @param {Function} [mapFn] - text normalizer function
* @returns {Promise}
*
* @example
* const path = require('path');
* const { jsonToText } = require('intent-tools');
*
* const from = path.resolve(process.cwd(), 'sample.json');
* const to = path.resolve(process.cwd(), 'trainingData.txt');
*
* main.jsonToText(from, to)
* .catch(e => console.error(e));
*/
function jsonToText (input, output, pipeline = [], mapFn = null) {
let inp;
let out;
const map = mapFn || filters.normalize;
if (typeof input === 'string') {
inp = fs.createReadStream(input, { encoding: 'utf8' });
} else {
inp = input;
}
if (typeof output === 'string') {
out = fs.createWriteStream(output);
} else {
out = output;
}
const pipes = new Pipeline();
let tag;
let mapped;
pipes.add(inp);
pipes.add(eventStream.mapSync(data => data.toString().replace(/^\s|\s+$/g, ' ')));
pipes.add(JSONStream.parse('rasa_nlu_data.common_examples.*'));
if (pipeline) {
pipes.add(pipeline);
}
pipes.add(eventStream.mapSync((data) => {
tag = `__label__${data.intent}`;
mapped = map(data.text);
return `${tag} ${mapped}\n`;
}));
pipes.add(out);
return pipes.promise();
}
module.exports = jsonToText;