UNPKG

wordmap

Version:
39 lines (38 loc) 1.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const Algorithm_1 = require("../Algorithm"); /** * In other algorithms n-grams are scored equally regardless of length. * However, shorter n-grams are more prevalent than longer n-grams. * This typically results in shorter n-grams overwhelming the output. * * The weight of an alignment increases proportionally to it's length, * and relative sentence coverage in primary and secondary text. */ class NgramLength extends Algorithm_1.default { constructor() { super(...arguments); this.name = "n-gram length"; } execute(prediction) { let weight = 0; // TRICKY: do not score null alignments if (!prediction.target.isNull()) { // sentence lengths const sourceSentenceLength = prediction.source.sentenceTokenLength; const targetSentenceLength = prediction.target.sentenceTokenLength; // n-gram lengths const sourceLength = prediction.source.tokenLength; const targetLength = prediction.target.tokenLength; const primaryLengthRatio = sourceLength / sourceSentenceLength; const secondaryLengthRatio = targetLength / targetSentenceLength; // length affinity const delta = Math.abs(primaryLengthRatio - secondaryLengthRatio); // TRICKY: the power of 5 improves the curve weight = Math.pow(1 - delta, 5); } prediction.setScore("ngramLength", weight); return prediction; } } exports.default = NgramLength;