wordmap
Version:
Multi-Lingual Word Alignment Prediction
108 lines (107 loc) • 2.92 kB
TypeScript
import { Token } from "wordmap-lexer";
/**
* Represents a set of zero or more tokens from a text.
*/
export default class Ngram {
occurrence: number;
occurrences: number;
private tokens;
private cachedKey;
private cachedLemmaKey;
/**
* Returns the length of the n-gram in {@link Token}'s
* @return {number}
*/
readonly tokenLength: number;
/**
* Returns the length of the n-gram in characters.
* This does not account for whitespace.
* @return {number}
*/
readonly characterLength: number;
/**
* Returns the position (in units of {@link Token} ) at which this n-gram appears in the sentence.
* @return {number} - the position
*/
readonly tokenPosition: number;
/**
* Returns the length of the sentence (in units of {@link Token}) in which this n-gram occurs.
* @return {number}
*/
readonly sentenceTokenLength: number;
/**
* Returns the length of the sentence (in units of character) in which this n-gram occurs.
* This includes whitespace in the sentence
* @return {number}
*/
readonly sentenceCharacterLength: number;
/**
* Returns the position (in units of character) at which this n-gram appears in the sentence.
* @return {number} - the position
*/
readonly characterPosition: number;
/**
* Returns the n-gram key
*/
readonly key: string;
/**
* Returns the n-gram lemma-based key
*/
readonly lemmaKey: string | undefined;
/**
* @param {Array<Token>} [tokens=[]] - a list of tokens of which this n-gram is composed
*/
constructor(tokens?: Token[]);
/**
* Checks if this n-gram contains one token
* @return {boolean}
*/
isUnigram(): boolean;
/**
* Checks if this n-gram contains two tokens
* @return {boolean}
*/
isBigram(): boolean;
/**
* Checks if this n-gram contains three tokens
* @return {boolean}
*/
isTrigram(): boolean;
/**
* Checks if this n-grams is an empty placeholder
* @return {boolean}
*/
isNull(): boolean;
/**
* Returns the tokens in this n-gram
* @return {Token[]}
*/
getTokens(): Token[];
/**
* Returns a human readable form of the n-gram
* @return {string}
*/
toString(): string;
/**
* Outputs the n-gram to json
* @param verbose - print full metadata
* @return {object}
*/
toJSON(verbose?: boolean): object;
/**
* Checks if two n-grams are equal
* @param {Ngram} ngram
* @return {boolean}
*/
equals(ngram: Ngram): boolean;
/**
* Checks if two n-grams look the same
* @param {Ngram} ngram
* @return {boolean}
*/
looksLike(ngram: Ngram): boolean;
/**
* Caches the keys if they have not already been generated
*/
private cacheKeys;
}