wordmap

Version:

Multi-Lingual Word Alignment Prediction

39 lines (38 loc) • 1.85 kB

TypeScript

import { Token } from "wordmap-lexer"; import PermutationIndex from "./index/PermutationIndex"; import Alignment from "./structures/Alignment"; import Ngram from "./structures/Ngram"; /** * A collection of parsing functions */ export default class Parser { /** * Returns an array of n-grams of a particular size from a sentence * @param {Array<Token>} sentence - the sentence from which n-grams will be read * @param {number} ngramLength - the length of each n-gram. * @returns {Array<Ngram>} */ static sizedNgrams(sentence: Token[], ngramLength: number): Ngram[]; /** * Generates an array of all possible contiguous n-grams within the sentence. * @param {Array<Token>} sentence - the tokens in a sentence * @param {number} [maxNgramLength=3] - the maximum n-gram size to generate * @returns {any[]} */ static ngrams(sentence: Token[], maxNgramLength?: number): Ngram[]; /** * Generates an array of all possible alignments between two sets of n-grams * @deprecated used {@link indexAlignmentPermutations} instead (it's faster). * @param {Ngram[]} sourceNgrams - every possible n-gram in the source text * @param {Ngram[]} targetNgrams - every possible n-gram in the target text * @return {Alignment[]} */ static alignments(sourceNgrams: Ngram[], targetNgrams: Ngram[]): Alignment[]; /** * Indexes all possible alignment permutations between two sets of n-grams * @param {Ngram[]} sourceNgrams - every possible n-gram in the source text * @param {Ngram[]} targetNgrams - every possible n-gram in the target text * @param {PermutationIndex} index - the index that will receive the permutations */ static indexAlignmentPermutations(sourceNgrams: Ngram[], targetNgrams: Ngram[], index: PermutationIndex): void; }