UNPKG

wordmap

Version:
138 lines (137 loc) 7.14 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const GlobalAlgorithm_1 = require("../GlobalAlgorithm"); /** * This algorithm calculates the frequency of n-gram occurrences. */ class LemmaNgramFrequency extends GlobalAlgorithm_1.default { constructor() { super(...arguments); this.name = "lemma n-gram frequency"; } /** * Performs a numerical addition with the value of a key in a number object. * TODO: move this into it's own class? * * @param {NumberObject} object * @param {string} key * @param {number} value */ static addObjectNumber(object, key, value) { if (!(key in object)) { object[key] = 0; } object[key] += value; } /** * Performs a numerical division. * Division by zero will result in 0. * TODO: move this into a math utility? * * @param {number} dividend * @param {number} divisor * @return {number} */ static divideSafe(dividend, divisor) { if (divisor === 0) { return 0; } else { return dividend / divisor; } } /** * Load data into the predictions * @param predictions [description] * @param cIndex [description] * @param saIndex [description] * @return [description] */ execute(predictions, cIndex, saIndex) { const alignmentFrequencyCorpusSums = {}; const alignmentFrequencyAlignmentMemorySums = {}; for (const p of predictions) { // skip predictions without lemmas if (p.alignment.lemmaKey === undefined) { p.setScores({ "sourceCorpusLemmaPermutationsFrequencyRatio": 0, "targetCorpusLemmaPermutationsFrequencyRatio": 0, "sourceAlignmentMemoryLemmaFrequencyRatio": 0, "targetAlignmentMemoryLemmaFrequencyRatio": 0 }); continue; } // alignment permutation frequency within the corpus/alignment memory const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment.lemmaKey); const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment.lemmaKey); // n-gram permutation frequency within the corpus/alignment memory // looked up by n-gram // TODO: rename to something like this. // const sourceNgramFrequencyInCorpusPermutations // first. default to default n-gram frequency let ngramFrequencyCorpusSource = cIndex.permutations.sourceNgramFrequency.read(p.source.key); let ngramFrequencyAlignmentMemorySource = saIndex.sourceNgramFrequency.read(p.source.key); let ngramFrequencyCorpusTarget = cIndex.permutations.targetNgramFrequency.read(p.target.key); let ngramFrequencyAlignmentMemoryTarget = saIndex.targetNgramFrequency.read(p.target.key); // second. use lemma n-gram frequency where available if (p.source.lemmaKey !== undefined) { ngramFrequencyCorpusSource = cIndex.permutations.sourceNgramFrequency.read(p.source.lemmaKey); ngramFrequencyAlignmentMemorySource = saIndex.sourceNgramFrequency.read(p.source.lemmaKey); } if (p.target.lemmaKey !== undefined) { ngramFrequencyCorpusTarget = cIndex.permutations.targetNgramFrequency.read(p.target.lemmaKey); ngramFrequencyAlignmentMemoryTarget = saIndex.targetNgramFrequency.read(p.target.lemmaKey); } // permutation frequency ratio const sourceCorpusLemmaPermutationsFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusSource); const targetCorpusLemmaPermutationsFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyCorpus, ngramFrequencyCorpusTarget); const sourceAlignmentMemoryLemmaFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemorySource); const targetAlignmentMemoryLemmaFrequencyRatio = LemmaNgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, ngramFrequencyAlignmentMemoryTarget); // store scores p.setScores({ sourceCorpusLemmaPermutationsFrequencyRatio, targetCorpusLemmaPermutationsFrequencyRatio, sourceAlignmentMemoryLemmaFrequencyRatio, targetAlignmentMemoryLemmaFrequencyRatio }); // sum alignment frequencies LemmaNgramFrequency.addObjectNumber(alignmentFrequencyCorpusSums, p.key, alignmentFrequencyCorpus); LemmaNgramFrequency.addObjectNumber(alignmentFrequencyAlignmentMemorySums, p.key, alignmentFrequencyAlignmentMemory); } // calculate filtered frequency ratios for (const p of predictions) { // skip predictions without lemmas if (p.alignment.lemmaKey === undefined) { p.setScores({ // alignmentFrequencyCorpusFiltered, // alignmentFrequencyAlignmentMemoryFiltered, // TODO: we aren't using these at the moment "lemmaFrequencyRatioCorpusFiltered": 0, "lemmaFrequencyRatioAlignmentMemoryFiltered": 0 }); continue; } const alignmentFrequencyCorpus = cIndex.permutations.alignmentFrequency.read(p.alignment.lemmaKey); const alignmentFrequencyAlignmentMemory = saIndex.alignmentFrequency.read(p.alignment.lemmaKey); // TODO: instead of generating filters of alignmentFrequencyCorpus etc // we want to generate filtered ngramFrequencyCorpusSource and ngramFrequencyCorpusTarget // see notes in ngram_frequency line 160. // alignment frequency in the filtered corpus and alignment memory const alignmentFrequencyCorpusFiltered = alignmentFrequencyCorpusSums[p.key]; const alignmentFrequencyAlignmentMemoryFiltered = alignmentFrequencyAlignmentMemorySums[p.key]; // source and target frequency ratio for the corpus and alignment memory const lemmaFrequencyRatioCorpusFiltered = LemmaNgramFrequency.divideSafe(alignmentFrequencyCorpus, alignmentFrequencyCorpusFiltered); const lemmaFrequencyRatioAlignmentMemoryFiltered = LemmaNgramFrequency.divideSafe(alignmentFrequencyAlignmentMemory, alignmentFrequencyAlignmentMemoryFiltered); // store scores p.setScores({ // alignmentFrequencyCorpusFiltered, // alignmentFrequencyAlignmentMemoryFiltered, // TODO: we aren't using these at the moment lemmaFrequencyRatioCorpusFiltered, lemmaFrequencyRatioAlignmentMemoryFiltered }); } return predictions; } } exports.default = LemmaNgramFrequency;