UNPKG

wordmap

Version:
107 lines (106 loc) 5.31 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const GlobalAlgorithm_1 = require("../GlobalAlgorithm"); /** * This algorithm calculates the frequency of n-gram occurrences. */ class NgramFrequency extends GlobalAlgorithm_1.default { constructor() { super(...arguments); this.name = "n-gram frequency"; } /** * Performs a numerical addition with the value of a key in a number object. * TODO: move this into it's own class? * * @param {NumberObject} object * @param {string} key * @param {number} value */ static addObjectNumber(object, key, value) { if (!(key in object)) { object[key] = 0; } object[key] += value; } /** * Performs a numerical division. * Division by zero will result in 0. * TODO: move this into a math utility? * * @param {number} dividend * @param {number} divisor * @return {number} */ static divideSafe(dividend, divisor) { if (divisor === 0) { return 0; } else { return dividend / divisor; } } /** * Load data into the predictions * @param predictions [description] * @param cIndex [description] * @param saIndex [description] * @return [description] */ execute(predictions, cIndex, saIndex) { const alignmentFrequencyCorpusSums = {}; const alignmentFrequencyAlignmentMemorySums = {}; for (const p of predictions) { // alignment permutation frequency within the corpus/alignment memory const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment); const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment); // n-gram permutation frequency within the corpus/alignment memory // looked up by n-gram // TODO: rename to something like this. // const sourceNgramFrequencyInCorpusPermutations const ngramFrequencyCorpusSource = cIndex.permutations.sourceNgramFrequency.read(p.source); const ngramFrequencyAlignmentMemorySource = saIndex.sourceNgramFrequency.read(p.source); const ngramFrequencyCorpusTarget = cIndex.permutations.targetNgramFrequency.read(p.target); const ngramFrequencyAlignmentMemoryTarget = saIndex.targetNgramFrequency.read(p.target); // permutation frequency ratio const sourceCorpusPermutationsFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusSource); const targetCorpusPermutationsFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusTarget); const sourceAlignmentMemoryFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemorySource); const targetAlignmentMemoryFrequencyRatio = NgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemoryTarget); // store scores p.setScores({ sourceCorpusPermutationsFrequencyRatio, targetCorpusPermutationsFrequencyRatio, sourceAlignmentMemoryFrequencyRatio, targetAlignmentMemoryFrequencyRatio }); // sum alignment frequencies NgramFrequency.addObjectNumber(alignmentFrequencyCorpusSums, p.key, alignmentFrequencyCorpus); NgramFrequency.addObjectNumber(alignmentFrequencyAlignmentMemorySums, p.key, alignmentFrequencyAlignmentMemory); } // calculate filtered frequency ratios for (const p of predictions) { const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment); const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment); // TODO: instead of generating filters of alignmentFrequencyCorpus etc // we want to generate filtered ngramFrequencyCorpusSource and ngramFrequencyCorpusTarget // see notes in ngram_frequency line 160. // alignment frequency in the filtered corpus and alignment memory const alignmentFrequencyCorpusFiltered = alignmentFrequencyCorpusSums[p.key]; const alignmentFrequencyAlignmentMemoryFiltered = alignmentFrequencyAlignmentMemorySums[p.key]; // source and target frequency ratio for the corpus and alignment memory const frequencyRatioCorpusFiltered = NgramFrequency.divideSafe(alignmentFrequencyCorpus, alignmentFrequencyCorpusFiltered); const frequencyRatioAlignmentMemoryFiltered = NgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, alignmentFrequencyAlignmentMemoryFiltered); // store scores p.setScores({ // alignmentFrequencyCorpusFiltered, // alignmentFrequencyAlignmentMemoryFiltered, // TODO: we aren't using these at the moment frequencyRatioCorpusFiltered, frequencyRatioAlignmentMemoryFiltered }); } return predictions; } } exports.default = NgramFrequency;