wordmap
Version:
Multi-Lingual Word Alignment Prediction
39 lines (38 loc) • 1.85 kB
TypeScript
import { Token } from "wordmap-lexer";
import PermutationIndex from "./index/PermutationIndex";
import Alignment from "./structures/Alignment";
import Ngram from "./structures/Ngram";
/**
* A collection of parsing functions
*/
export default class Parser {
/**
* Returns an array of n-grams of a particular size from a sentence
* @param {Array<Token>} sentence - the sentence from which n-grams will be read
* @param {number} ngramLength - the length of each n-gram.
* @returns {Array<Ngram>}
*/
static sizedNgrams(sentence: Token[], ngramLength: number): Ngram[];
/**
* Generates an array of all possible contiguous n-grams within the sentence.
* @param {Array<Token>} sentence - the tokens in a sentence
* @param {number} [maxNgramLength=3] - the maximum n-gram size to generate
* @returns {any[]}
*/
static ngrams(sentence: Token[], maxNgramLength?: number): Ngram[];
/**
* Generates an array of all possible alignments between two sets of n-grams
* @deprecated used {@link indexAlignmentPermutations} instead (it's faster).
* @param {Ngram[]} sourceNgrams - every possible n-gram in the source text
* @param {Ngram[]} targetNgrams - every possible n-gram in the target text
* @return {Alignment[]}
*/
static alignments(sourceNgrams: Ngram[], targetNgrams: Ngram[]): Alignment[];
/**
* Indexes all possible alignment permutations between two sets of n-grams
* @param {Ngram[]} sourceNgrams - every possible n-gram in the source text
* @param {Ngram[]} targetNgrams - every possible n-gram in the target text
* @param {PermutationIndex} index - the index that will receive the permutations
*/
static indexAlignmentPermutations(sourceNgrams: Ngram[], targetNgrams: Ngram[], index: PermutationIndex): void;
}