wordmap
Version:
Multi-Lingual Word Alignment Prediction
64 lines (63 loc) • 2.46 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const Algorithm_1 = require("../Algorithm");
/**
* A commonly seen pattern in translation is that word repetition in the primary text
* is often seen in the secondary text.
*/
class AlignmentOccurrences extends Algorithm_1.default {
constructor() {
super(...arguments);
this.name = "alignment occurrences";
}
/**
* Calculates the
* @param sourceNgramFrequency
* @param targetNgramFrequency
*/
static calc(sourceNgramFrequency, targetNgramFrequency) {
if (sourceNgramFrequency === 0 || targetNgramFrequency === 0) {
return 0;
}
else {
return Math.min(sourceNgramFrequency, targetNgramFrequency) /
Math.max(sourceNgramFrequency, targetNgramFrequency);
// TODO: Review above change with Klappy
// const delta = Math.abs(sourceNgramFrequency - targetNgramFrequency);
// return 1 / (delta + 1);
}
}
/**
* Calculates the weight based on the word
* @param p
* @param usIndex
*/
static calcOccurrenceSimilarity(p, usIndex) {
const sourceFrequency = usIndex.static.sourceNgramFrequency.read(p.source);
const targetFrequency = usIndex.static.targetNgramFrequency.read(p.target);
const weight = AlignmentOccurrences.calc(sourceFrequency, targetFrequency);
p.setScore("alignmentOccurrences", weight);
}
/**
* Calculates the weight based on the lemma
* @param p
* @param usIndex
*/
static calcLemmaOccurrenceSimilarity(p, usIndex) {
if (p.source.lemmaKey !== undefined && p.target.lemmaKey !== undefined) {
const sourceFrequency = usIndex.static.sourceNgramFrequency.read(p.source.lemmaKey);
const targetFrequency = usIndex.static.targetNgramFrequency.read(p.target.lemmaKey);
const weight = AlignmentOccurrences.calc(sourceFrequency, targetFrequency);
p.setScore("lemmaAlignmentOccurrences", weight);
}
else {
p.setScore("lemmaAlignmentOccurrences", 0);
}
}
execute(prediction, cIndex, saIndex, usIndex) {
AlignmentOccurrences.calcOccurrenceSimilarity(prediction, usIndex);
AlignmentOccurrences.calcLemmaOccurrenceSimilarity(prediction, usIndex);
return prediction;
}
}
exports.default = AlignmentOccurrences;