wordmap
Version:
Multi-Lingual Word Alignment Prediction
134 lines (133 loc) • 5.88 kB
TypeScript
import { Token } from "wordmap-lexer";
import Algorithm from "./Algorithm";
import AlgorithmInterface from "./AlgorithmInterface";
import GlobalAlgorithm from "./GlobalAlgorithm";
import AlignmentMemoryIndex from "./index/AlignmentMemoryIndex";
import CorpusIndex from "./index/CorpusIndex";
import NumberObject from "./index/NumberObject";
import Alignment from "./structures/Alignment";
import Ngram from "./structures/Ngram";
import Prediction from "./structures/Prediction";
import Suggestion from "./structures/Suggestion";
/**
* Represents a multi-lingual word alignment prediction engine.
*/
export default class Engine {
/**
* Generates an array of all possible alignment predictions
* @param {Ngram[]} sourceNgrams - every possible n-gram in the source text
* @param {Ngram[]} targetNgrams - every possible n-gram in the target text
* @return {Prediction[]}
*/
static generatePredictions(sourceNgrams: Ngram[], targetNgrams: Ngram[]): Prediction[];
/**
* Generates an array of all possible contiguous n-grams within the sentence.
* @deprecated use {@link Parser.ngrams} instead
* @param {Array<Token>} sentence - the tokens in a sentence
* @param {number} [maxNgramLength=3] - the maximum n-gram size to generate
* @returns {any[]}
*/
static generateSentenceNgrams(sentence: Token[], maxNgramLength?: number): Ngram[];
/**
* Returns an array of n-grams of a particular size from a sentence
* @deprecated used {@link Parser.sizedNgrams} instead
* @param {Array<Token>} sentence - the sentence from which n-grams will be read
* @param {number} ngramLength - the length of each n-gram.
* @returns {Array<Ngram>}
*/
static readSizedNgrams(sentence: Token[], ngramLength: number): Ngram[];
/**
* Calculates the weighted confidence score of a prediction
* @param {Prediction} prediction - the prediction to score
* @param {string[]} scoreKeys - the score keys to include in the calculation
* @param {NumberObject} weights - the weights to influence the calculation
* @return {number}
*/
static calculateWeightedConfidence(prediction: Prediction, scoreKeys: string[], weights: NumberObject): number;
/**
* Scores the predictions and returns a filtered set of suggestions
* TODO: this should not be done in the engine because we don't know anything about the algorithms here.
* @param predictions
* @param saIndex
*/
static calculateConfidence(predictions: Prediction[], saIndex: AlignmentMemoryIndex): Prediction[];
/**
* Returns an array of alignment suggestions
* @param predictions - the predictions from which to base the suggestion
* @param maxSuggestions - the maximum number of suggestions to return
* @return {Suggestion}
*/
static suggest(predictions: Prediction[], maxSuggestions?: number): Suggestion[];
/**
* Sorts an array of suggestions by compound confidence
* @param {Suggestion[]} suggestions - the suggestions to sort
* @return {Suggestion[]}
*/
static sortSuggestions(suggestions: Suggestion[]): Suggestion[];
/**
* Sorts an array of predictions by confidence
* @param {Prediction[]} predictions - the predictions to sort
* @return {Prediction[]}
*/
static sortPredictions(predictions: Prediction[]): Prediction[];
private maxTargetNgramLength;
private maxSourceNgramLength;
private nGramWarnings;
private registeredAlgorithms;
private registeredGlobalAlgorithms;
private corpusIndex;
private alignmentMemoryIndex;
/**
* Returns a list of algorithms that are registered in the engine
* @return {Array<Algorithm>}
*/
readonly algorithms: Algorithm[];
constructor({ sourceNgramLength, targetNgramLength, nGramWarnings }?: {
sourceNgramLength?: number;
targetNgramLength?: number;
nGramWarnings?: true;
});
/**
* Executes prediction algorithms on the unaligned sentence pair.
* The sentence tokens should contain positional metrics for better accuracy.
*
* @param {Token[]} sourceSentence - the source sentence tokens.
* @param {Token[]} targetSentence - the target sentence tokens.
* @param {CorpusIndex} cIndex
* @param {AlignmentMemoryIndex} saIndex
* @param {Algorithm[]} algorithms
* @param {GlobalAlgorithm[]} globalAlgorithms
* @return {Prediction[]}
*/
performPrediction(sourceSentence: Token[], targetSentence: Token[], cIndex: CorpusIndex, saIndex: AlignmentMemoryIndex, algorithms: Algorithm[], globalAlgorithms: GlobalAlgorithm[]): Prediction[];
/**
* Generates the final confidence scores and sorts the predictions.
* @param {Prediction[]} predictions
* @return {Prediction[]}
*/
score(predictions: Prediction[]): Prediction[];
/**
* Adds a new algorithm to the engine.
* @param {Algorithm} algorithm - the algorithm to run with the engine.
*/
registerAlgorithm(algorithm: AlgorithmInterface): void;
/**
* Appends new corpus to the engine.
* @param {[Token[]]} source - an array of tokenized source sentences.
* @param {[Token[]]} target - an array of tokenized target sentences.
*/
addCorpus(source: Token[][], target: Token[][]): void;
/**
* Appends new alignment memory to the engine.
* Adding alignment memory improves the quality of predictions.
* @param {Array<Alignment>} alignmentMemory - a list of alignments
*/
addAlignmentMemory(alignmentMemory: Alignment[]): void;
/**
* Performs the prediction calculations
* @param {Token[]} sourceSentence
* @param {Token[]} targetSentence
* @return {Prediction[]}
*/
run(sourceSentence: Token[], targetSentence: Token[]): Prediction[];
}