wordmap
Version:
Multi-Lingual Word Alignment Prediction
107 lines (106 loc) • 5.31 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const GlobalAlgorithm_1 = require("../GlobalAlgorithm");
/**
* This algorithm calculates the frequency of n-gram occurrences.
*/
class NgramFrequency extends GlobalAlgorithm_1.default {
constructor() {
super(...arguments);
this.name = "n-gram frequency";
}
/**
* Performs a numerical addition with the value of a key in a number object.
* TODO: move this into it's own class?
*
* @param {NumberObject} object
* @param {string} key
* @param {number} value
*/
static addObjectNumber(object, key, value) {
if (!(key in object)) {
object[key] = 0;
}
object[key] += value;
}
/**
* Performs a numerical division.
* Division by zero will result in 0.
* TODO: move this into a math utility?
*
* @param {number} dividend
* @param {number} divisor
* @return {number}
*/
static divideSafe(dividend, divisor) {
if (divisor === 0) {
return 0;
}
else {
return dividend / divisor;
}
}
/**
* Load data into the predictions
* @param predictions [description]
* @param cIndex [description]
* @param saIndex [description]
* @return [description]
*/
execute(predictions, cIndex, saIndex) {
const alignmentFrequencyCorpusSums = {};
const alignmentFrequencyAlignmentMemorySums = {};
for (const p of predictions) {
// alignment permutation frequency within the corpus/alignment memory
const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment);
const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment);
// n-gram permutation frequency within the corpus/alignment memory
// looked up by n-gram
// TODO: rename to something like this.
// const sourceNgramFrequencyInCorpusPermutations
const ngramFrequencyCorpusSource = cIndex.permutations.sourceNgramFrequency.read(p.source);
const ngramFrequencyAlignmentMemorySource = saIndex.sourceNgramFrequency.read(p.source);
const ngramFrequencyCorpusTarget = cIndex.permutations.targetNgramFrequency.read(p.target);
const ngramFrequencyAlignmentMemoryTarget = saIndex.targetNgramFrequency.read(p.target);
// permutation frequency ratio
const sourceCorpusPermutationsFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusSource);
const targetCorpusPermutationsFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusTarget);
const sourceAlignmentMemoryFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemorySource);
const targetAlignmentMemoryFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemoryTarget);
// store scores
p.setScores({
sourceCorpusPermutationsFrequencyRatio,
targetCorpusPermutationsFrequencyRatio,
sourceAlignmentMemoryFrequencyRatio,
targetAlignmentMemoryFrequencyRatio
});
// sum alignment frequencies
NgramFrequency.addObjectNumber(alignmentFrequencyCorpusSums, p.key, alignmentFrequencyCorpus);
NgramFrequency.addObjectNumber(alignmentFrequencyAlignmentMemorySums, p.key, alignmentFrequencyAlignmentMemory);
}
// calculate filtered frequency ratios
for (const p of predictions) {
const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment);
const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment);
// TODO: instead of generating filters of alignmentFrequencyCorpus etc
// we want to generate filtered ngramFrequencyCorpusSource and ngramFrequencyCorpusTarget
// see notes in ngram_frequency line 160.
// alignment frequency in the filtered corpus and alignment memory
const alignmentFrequencyCorpusFiltered = alignmentFrequencyCorpusSums[p.key];
const alignmentFrequencyAlignmentMemoryFiltered = alignmentFrequencyAlignmentMemorySums[p.key];
// source and target frequency ratio for the corpus and alignment memory
const frequencyRatioCorpusFiltered = NgramFrequency.divideSafe(alignmentFrequencyCorpus, alignmentFrequencyCorpusFiltered);
const frequencyRatioAlignmentMemoryFiltered = NgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, alignmentFrequencyAlignmentMemoryFiltered);
// store scores
p.setScores({
// alignmentFrequencyCorpusFiltered,
// alignmentFrequencyAlignmentMemoryFiltered,
// TODO: we aren't using these at the moment
frequencyRatioCorpusFiltered,
frequencyRatioAlignmentMemoryFiltered
});
}
return predictions;
}
}
exports.default = NgramFrequency;