UNPKG

wordmap

Version:
134 lines (133 loc) 5.88 kB
import { Token } from "wordmap-lexer"; import Algorithm from "./Algorithm"; import AlgorithmInterface from "./AlgorithmInterface"; import GlobalAlgorithm from "./GlobalAlgorithm"; import AlignmentMemoryIndex from "./index/AlignmentMemoryIndex"; import CorpusIndex from "./index/CorpusIndex"; import NumberObject from "./index/NumberObject"; import Alignment from "./structures/Alignment"; import Ngram from "./structures/Ngram"; import Prediction from "./structures/Prediction"; import Suggestion from "./structures/Suggestion"; /** * Represents a multi-lingual word alignment prediction engine. */ export default class Engine { /** * Generates an array of all possible alignment predictions * @param {Ngram[]} sourceNgrams - every possible n-gram in the source text * @param {Ngram[]} targetNgrams - every possible n-gram in the target text * @return {Prediction[]} */ static generatePredictions(sourceNgrams: Ngram[], targetNgrams: Ngram[]): Prediction[]; /** * Generates an array of all possible contiguous n-grams within the sentence. * @deprecated use {@link Parser.ngrams} instead * @param {Array<Token>} sentence - the tokens in a sentence * @param {number} [maxNgramLength=3] - the maximum n-gram size to generate * @returns {any[]} */ static generateSentenceNgrams(sentence: Token[], maxNgramLength?: number): Ngram[]; /** * Returns an array of n-grams of a particular size from a sentence * @deprecated used {@link Parser.sizedNgrams} instead * @param {Array<Token>} sentence - the sentence from which n-grams will be read * @param {number} ngramLength - the length of each n-gram. * @returns {Array<Ngram>} */ static readSizedNgrams(sentence: Token[], ngramLength: number): Ngram[]; /** * Calculates the weighted confidence score of a prediction * @param {Prediction} prediction - the prediction to score * @param {string[]} scoreKeys - the score keys to include in the calculation * @param {NumberObject} weights - the weights to influence the calculation * @return {number} */ static calculateWeightedConfidence(prediction: Prediction, scoreKeys: string[], weights: NumberObject): number; /** * Scores the predictions and returns a filtered set of suggestions * TODO: this should not be done in the engine because we don't know anything about the algorithms here. * @param predictions * @param saIndex */ static calculateConfidence(predictions: Prediction[], saIndex: AlignmentMemoryIndex): Prediction[]; /** * Returns an array of alignment suggestions * @param predictions - the predictions from which to base the suggestion * @param maxSuggestions - the maximum number of suggestions to return * @return {Suggestion} */ static suggest(predictions: Prediction[], maxSuggestions?: number): Suggestion[]; /** * Sorts an array of suggestions by compound confidence * @param {Suggestion[]} suggestions - the suggestions to sort * @return {Suggestion[]} */ static sortSuggestions(suggestions: Suggestion[]): Suggestion[]; /** * Sorts an array of predictions by confidence * @param {Prediction[]} predictions - the predictions to sort * @return {Prediction[]} */ static sortPredictions(predictions: Prediction[]): Prediction[]; private maxTargetNgramLength; private maxSourceNgramLength; private nGramWarnings; private registeredAlgorithms; private registeredGlobalAlgorithms; private corpusIndex; private alignmentMemoryIndex; /** * Returns a list of algorithms that are registered in the engine * @return {Array<Algorithm>} */ readonly algorithms: Algorithm[]; constructor({ sourceNgramLength, targetNgramLength, nGramWarnings }?: { sourceNgramLength?: number; targetNgramLength?: number; nGramWarnings?: true; }); /** * Executes prediction algorithms on the unaligned sentence pair. * The sentence tokens should contain positional metrics for better accuracy. * * @param {Token[]} sourceSentence - the source sentence tokens. * @param {Token[]} targetSentence - the target sentence tokens. * @param {CorpusIndex} cIndex * @param {AlignmentMemoryIndex} saIndex * @param {Algorithm[]} algorithms * @param {GlobalAlgorithm[]} globalAlgorithms * @return {Prediction[]} */ performPrediction(sourceSentence: Token[], targetSentence: Token[], cIndex: CorpusIndex, saIndex: AlignmentMemoryIndex, algorithms: Algorithm[], globalAlgorithms: GlobalAlgorithm[]): Prediction[]; /** * Generates the final confidence scores and sorts the predictions. * @param {Prediction[]} predictions * @return {Prediction[]} */ score(predictions: Prediction[]): Prediction[]; /** * Adds a new algorithm to the engine. * @param {Algorithm} algorithm - the algorithm to run with the engine. */ registerAlgorithm(algorithm: AlgorithmInterface): void; /** * Appends new corpus to the engine. * @param {[Token[]]} source - an array of tokenized source sentences. * @param {[Token[]]} target - an array of tokenized target sentences. */ addCorpus(source: Token[][], target: Token[][]): void; /** * Appends new alignment memory to the engine. * Adding alignment memory improves the quality of predictions. * @param {Array<Alignment>} alignmentMemory - a list of alignments */ addAlignmentMemory(alignmentMemory: Alignment[]): void; /** * Performs the prediction calculations * @param {Token[]} sourceSentence * @param {Token[]} targetSentence * @return {Prediction[]} */ run(sourceSentence: Token[], targetSentence: Token[]): Prediction[]; }