UNPKG

wordmap

Version:
55 lines (54 loc) 1.93 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const Parser_1 = require("../Parser"); const PermutationIndex_1 = require("./PermutationIndex"); const StaticIndex_1 = require("./StaticIndex"); /** * A collection of indexes for the corpus. */ class CorpusIndex { constructor() { this.permutationIndex = new PermutationIndex_1.default(); this.staticIndex = new StaticIndex_1.default(); } /** * Returns an index of permutation metrics. */ get permutations() { return this.permutationIndex; } /** * Returns an index of static metrics. */ get static() { return this.staticIndex; } /** * Appends sentences to the index. * The tokens must contain positional metrics for better accuracy. * * @param {Token[][]} source * @param {Token[][]} target * @param maxSourceNgramLength * @param maxTargetNgramLength */ append(source, target, maxSourceNgramLength, maxTargetNgramLength) { const sourceLength = source.length; if (sourceLength !== target.length) { throw Error("source and target corpus must be the same length"); } else { for (let i = 0; i < sourceLength; i++) { const sourceToken = source[i]; const targetToken = target[i]; const sourceNgrams = Parser_1.default.ngrams(sourceToken, maxSourceNgramLength); const targetNgrams = Parser_1.default.ngrams(targetToken, maxTargetNgramLength); // index static metrics this.staticIndex.addSentence(sourceToken, targetToken, sourceNgrams, targetNgrams); // index permutation metrics Parser_1.default.indexAlignmentPermutations(sourceNgrams, targetNgrams, this.permutationIndex); } } } } exports.default = CorpusIndex;