geneea-nlp-client

import { CharSpan } from "../../common/char-span"; import { UDep } from "../../common/udep"; import { UPos } from "../../common/upos"; import { Node } from "./node"; import { Paragraph } from "./paragraph"; import { Sentence } from "./sentence"; /** * A token including basic morphological and syntactic information. * A token is similar to a word, but includes punctuation. * Tokens have an zero-based index reflecting their position within their sentence. * The morphological and syntactical features might be null (deepLemma, lemma, morphTag, pos, fnc, parent), * or empty (children) if not requested or supported. */ declare class Token extends Node<Token> { readonly id: string; readonly idx: number; /** Text of this token, possibly after correction. */ text: string; /** Cahracter span within the paragraph. */ charSpan: CharSpan; /** Text of this token in the original paragraph. */ origText: string; /** Character span within the original paragraph. */ origCharSpan: CharSpan; /** Lemma of the token e.g. bezpecny. null if not requested/supported. */ deepLemma: string | null; /** Simple lemma of the token, e.g. nejnebezpecnejsi (in Cz, includes negation and grade). Null if not requested/supported. */ lemma: string | null; /** Google universal tag. Null if not requested/supported. */ pos: UPos | null; /** Morphological tag, e.g. AAMS1-...., VBD, ... Null if not requested/supported. */ morphTag: string | null; /** Universal and custom features */ feats: Map<string, string>; /** Label of the dependency edge. Null if not requested/supported. */ fnc: UDep | null; /** Dependency sub-function. None if not requested/supported. */ subFnc: string | null; /** Lemma info features, a list of strings. */ static readonly FEAT_LEMMA_INFO = "lemmaInfo"; /** Key presence signifies it is a negated word, value = true. */ static readonly FEAT_NEGATED = "negated"; /** Key presence signifies it is an unknown word, value = true. */ static readonly FEAT_UNKNOWN = "unknown"; private constructor(); /** Token factory method, public constructor. */ static of(id: string, idx: number, text: string, charSpan: CharSpan, origText?: string | null, origCharSpan?: CharSpan | null, deepLemma?: string | null, lemma?: string | null, pos?: UPos | null, morphTag?: string | null, feats?: Map<string, string> | null, fnc?: UDep | null, subFnc?: string | null): Token; /** True iff the token form contains a negation prefix. */ isNegated(): boolean; /** True iff the token is unknown to the lemmatizer. The lemma provided is the same as the token itself. */ isUnknown(): boolean; /** * Token following of preceding this token within the sentence. * @param offset Relative offset. The following tokens have a positive offse, * preceding a negative one. The ext token has offset = 1. * @returns The token at the relative offset or null if the offset is invalid. */ offsetToken(offset: number): Token | null; /** The previous token or null if this token is sentence initial. */ previous(): Token | null; /** The next token or null if this token is sentence final. */ next(): Token | null; /** * Full dependency function in the format `{fnc}:{subFnc}` if the sub-function is present. * Otherwise it's the same as `fnc`. */ get fullFnc(): string | null; /** Converts the token to a default non-recursive string: index + text. */ toSimpleString(): string; /** Converts the token to a non-recursive string: index + [text] + [pos] + [fnc]. */ toStringWith(text: boolean, pos: boolean, fnc: boolean): string; toString(): string; } /** * Tokens within a single sentence; ordered by word-order; non-empty, continuous or discontinuous. */ declare class TokenSupport { tokens: Token[]; isContinuous: boolean; /** * * @param tokens The tokens of this support. * @param isContinuous Is this support a continuous sequence of tokens, i.e. a token span? */ constructor(tokens: Token[], isContinuous: boolean); /** * Creates a TokenSupport object from a list of tokens. * @param tokens Non-empty list of tokens (no need for them to be sorted). */ static of(tokens: Token[]): TokenSupport; get sentence(): Sentence; get paragraph(): Paragraph; get first(): Token; get last(): Token; get size(): number; get ids(): string[]; /** * The character span between the first and last token relative to the enclosing paragraph; * for discontinuous support this includes intervening gaps. */ get charSpan(): CharSpan; /** Offset of the first character of these tokens within the enclosing paragraph. */ get firstCharParaOffset(): number; /** Offset of the last character of these tokens within the enclosing paragraph. */ get lastCharParaOffset(): number; /** * Substring of a full text as denoted by this support (before correction). * For discontinuous supports, the result includes the intervening gaps. */ get text(): string; /** Breaks this token support into continuous sub-sequences of tokens. */ spans(): TokenSupport[]; /** The coverage texts of each of the continuous spans, ordered by word-order. */ textSpans(): string[]; toString(): string; } export { Token, TokenSupport };