word-phoneme-map
Version:
Provides a two-way map between the words and phonemes listed in the CMU Pronouncing Dictionary.
101 lines (80 loc) • 2.6 kB
JavaScript
var queue = require('queue-async');
var level = require('level');
var basicSubleveler = require('basic-subleveler');
var phonemeTypes = require('phoneme-types');
var callNextTick = require('call-next-tick');
var createReversePhonemeIndexer = require('./reverse-phoneme-indexer');
var createForwardPhonemeIndexer = require('./forward-phoneme-indexer')
var queue = require('queue-async');
function createPhonemeIndexer(opts, done) {
var indexWordByReversePhonemes;
var indexWordByForwardPhonemes;
var db = level(
opts.dbLocation,
{
valueEncoding: 'json'
}
);
var db = basicSubleveler.setUpSubleveledDB({
db: db,
sublevels: {
words: 'w',
phonemes: 'p'
}
});
var specialIndexerOpts = {
db: db
};
var specialIndexerQueue = queue();
specialIndexerQueue.defer(createReversePhonemeIndexer, specialIndexerOpts);
specialIndexerQueue.defer(createForwardPhonemeIndexer, specialIndexerOpts);
specialIndexerQueue.await(passBackMethods);
function passBackMethods(error, reverseIndexMethod, forwardIndexMethod) {
if (error) {
done(error);
}
else {
indexWordByReversePhonemes = reverseIndexMethod;
indexWordByForwardPhonemes = forwardIndexMethod;
var indexerMethods = {
index: index,
closeDb: db.close.bind(db)
};
done(error, indexerMethods);
}
}
function index(word, cmuDictPhonemeString, done) {
var phonemeString = phonemeTypes.stripStressor(cmuDictPhonemeString);
var phonemes = phonemeString.split(' ');
phonemeString = phonemes.join('_');
if (stringIsEmpty(word)) {
callNextTick(done, new Error('Missing word.'));
return;
}
if (stringIsEmpty(phonemeString)) {
callNextTick(done, new Error('Missing phonemeString.'));
return;
}
var q = queue();
// Index by word.
var cleanedWord = stripOrdinal(word);
var wordLevel = db.words.sublevel(cleanedWord);
q.defer(wordLevel.put, phonemeString, phonemes);
// Index by phoneme string.
var phonemeLevel = db.phonemes.sublevel(phonemeString);
q.defer(phonemeLevel.put, cleanedWord, cleanedWord);
// Reverse index.
q.defer(indexWordByReversePhonemes, cleanedWord, phonemes);
// Forward index.
q.defer(indexWordByForwardPhonemes, cleanedWord, phonemes);
q.awaitAll(done);
}
}
function stringIsEmpty(s) {
return (typeof s !== 'string' || s.length < 1);
}
var ordinalRegex = /\(\d\)/;
function stripOrdinal(word) {
return word.replace(ordinalRegex, '');
}
module.exports = createPhonemeIndexer;