wordmap
Version:
Multi-Lingual Word Alignment Prediction
55 lines (54 loc) • 1.93 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const Parser_1 = require("../Parser");
const PermutationIndex_1 = require("./PermutationIndex");
const StaticIndex_1 = require("./StaticIndex");
/**
* A collection of indexes for the corpus.
*/
class CorpusIndex {
constructor() {
this.permutationIndex = new PermutationIndex_1.default();
this.staticIndex = new StaticIndex_1.default();
}
/**
* Returns an index of permutation metrics.
*/
get permutations() {
return this.permutationIndex;
}
/**
* Returns an index of static metrics.
*/
get static() {
return this.staticIndex;
}
/**
* Appends sentences to the index.
* The tokens must contain positional metrics for better accuracy.
*
* @param {Token[][]} source
* @param {Token[][]} target
* @param maxSourceNgramLength
* @param maxTargetNgramLength
*/
append(source, target, maxSourceNgramLength, maxTargetNgramLength) {
const sourceLength = source.length;
if (sourceLength !== target.length) {
throw Error("source and target corpus must be the same length");
}
else {
for (let i = 0; i < sourceLength; i++) {
const sourceToken = source[i];
const targetToken = target[i];
const sourceNgrams = Parser_1.default.ngrams(sourceToken, maxSourceNgramLength);
const targetNgrams = Parser_1.default.ngrams(targetToken, maxTargetNgramLength);
// index static metrics
this.staticIndex.addSentence(sourceToken, targetToken, sourceNgrams, targetNgrams);
// index permutation metrics
Parser_1.default.indexAlignmentPermutations(sourceNgrams, targetNgrams, this.permutationIndex);
}
}
}
}
exports.default = CorpusIndex;