wordmap
Version:
Multi-Lingual Word Alignment Prediction
138 lines (137 loc) • 7.14 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const GlobalAlgorithm_1 = require("../GlobalAlgorithm");
/**
* This algorithm calculates the frequency of n-gram occurrences.
*/
class LemmaNgramFrequency extends GlobalAlgorithm_1.default {
constructor() {
super(...arguments);
this.name = "lemma n-gram frequency";
}
/**
* Performs a numerical addition with the value of a key in a number object.
* TODO: move this into it's own class?
*
* @param {NumberObject} object
* @param {string} key
* @param {number} value
*/
static addObjectNumber(object, key, value) {
if (!(key in object)) {
object[key] = 0;
}
object[key] += value;
}
/**
* Performs a numerical division.
* Division by zero will result in 0.
* TODO: move this into a math utility?
*
* @param {number} dividend
* @param {number} divisor
* @return {number}
*/
static divideSafe(dividend, divisor) {
if (divisor === 0) {
return 0;
}
else {
return dividend / divisor;
}
}
/**
* Load data into the predictions
* @param predictions [description]
* @param cIndex [description]
* @param saIndex [description]
* @return [description]
*/
execute(predictions, cIndex, saIndex) {
const alignmentFrequencyCorpusSums = {};
const alignmentFrequencyAlignmentMemorySums = {};
for (const p of predictions) {
// skip predictions without lemmas
if (p.alignment.lemmaKey === undefined) {
p.setScores({
"sourceCorpusLemmaPermutationsFrequencyRatio": 0,
"targetCorpusLemmaPermutationsFrequencyRatio": 0,
"sourceAlignmentMemoryLemmaFrequencyRatio": 0,
"targetAlignmentMemoryLemmaFrequencyRatio": 0
});
continue;
}
// alignment permutation frequency within the corpus/alignment memory
const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment.lemmaKey);
const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment.lemmaKey);
// n-gram permutation frequency within the corpus/alignment memory
// looked up by n-gram
// TODO: rename to something like this.
// const sourceNgramFrequencyInCorpusPermutations
// first. default to default n-gram frequency
let ngramFrequencyCorpusSource = cIndex.permutations.sourceNgramFrequency.read(p.source.key);
let ngramFrequencyAlignmentMemorySource = saIndex.sourceNgramFrequency.read(p.source.key);
let ngramFrequencyCorpusTarget = cIndex.permutations.targetNgramFrequency.read(p.target.key);
let ngramFrequencyAlignmentMemoryTarget = saIndex.targetNgramFrequency.read(p.target.key);
// second. use lemma n-gram frequency where available
if (p.source.lemmaKey !== undefined) {
ngramFrequencyCorpusSource = cIndex.permutations.sourceNgramFrequency.read(p.source.lemmaKey);
ngramFrequencyAlignmentMemorySource = saIndex.sourceNgramFrequency.read(p.source.lemmaKey);
}
if (p.target.lemmaKey !== undefined) {
ngramFrequencyCorpusTarget = cIndex.permutations.targetNgramFrequency.read(p.target.lemmaKey);
ngramFrequencyAlignmentMemoryTarget = saIndex.targetNgramFrequency.read(p.target.lemmaKey);
}
// permutation frequency ratio
const sourceCorpusLemmaPermutationsFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusSource);
const targetCorpusLemmaPermutationsFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusTarget);
const sourceAlignmentMemoryLemmaFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemorySource);
const targetAlignmentMemoryLemmaFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemoryTarget);
// store scores
p.setScores({
sourceCorpusLemmaPermutationsFrequencyRatio,
targetCorpusLemmaPermutationsFrequencyRatio,
sourceAlignmentMemoryLemmaFrequencyRatio,
targetAlignmentMemoryLemmaFrequencyRatio
});
// sum alignment frequencies
LemmaNgramFrequency.addObjectNumber(alignmentFrequencyCorpusSums, p.key, alignmentFrequencyCorpus);
LemmaNgramFrequency.addObjectNumber(alignmentFrequencyAlignmentMemorySums, p.key, alignmentFrequencyAlignmentMemory);
}
// calculate filtered frequency ratios
for (const p of predictions) {
// skip predictions without lemmas
if (p.alignment.lemmaKey === undefined) {
p.setScores({
// alignmentFrequencyCorpusFiltered,
// alignmentFrequencyAlignmentMemoryFiltered,
// TODO: we aren't using these at the moment
"lemmaFrequencyRatioCorpusFiltered": 0,
"lemmaFrequencyRatioAlignmentMemoryFiltered": 0
});
continue;
}
const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment.lemmaKey);
const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment.lemmaKey);
// TODO: instead of generating filters of alignmentFrequencyCorpus etc
// we want to generate filtered ngramFrequencyCorpusSource and ngramFrequencyCorpusTarget
// see notes in ngram_frequency line 160.
// alignment frequency in the filtered corpus and alignment memory
const alignmentFrequencyCorpusFiltered = alignmentFrequencyCorpusSums[p.key];
const alignmentFrequencyAlignmentMemoryFiltered = alignmentFrequencyAlignmentMemorySums[p.key];
// source and target frequency ratio for the corpus and alignment memory
const lemmaFrequencyRatioCorpusFiltered = LemmaNgramFrequency.divideSafe(alignmentFrequencyCorpus, alignmentFrequencyCorpusFiltered);
const lemmaFrequencyRatioAlignmentMemoryFiltered = LemmaNgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, alignmentFrequencyAlignmentMemoryFiltered);
// store scores
p.setScores({
// alignmentFrequencyCorpusFiltered,
// alignmentFrequencyAlignmentMemoryFiltered,
// TODO: we aren't using these at the moment
lemmaFrequencyRatioCorpusFiltered,
lemmaFrequencyRatioAlignmentMemoryFiltered
});
}
return predictions;
}
}
exports.default = LemmaNgramFrequency;