geneea-nlp-client
Version:
The TypeScript Client for Geneea Interpretor G3 API.
118 lines (117 loc) • 5.47 kB
TypeScript
import { CharSpan } from "../../common/char-span";
import { UDep } from "../../common/udep";
import { UPos } from "../../common/upos";
import { Node } from "./node";
import { Paragraph } from "./paragraph";
import { Sentence } from "./sentence";
/**
* A token including basic morphological and syntactic information.
* A token is similar to a word, but includes punctuation.
* Tokens have an zero-based index reflecting their position within their sentence.
* The morphological and syntactical features might be null (deepLemma, lemma, morphTag, pos, fnc, parent),
* or empty (children) if not requested or supported.
*/
declare class Token extends Node<Token> {
readonly id: string;
readonly idx: number;
/** Text of this token, possibly after correction. */
text: string;
/** Cahracter span within the paragraph. */
charSpan: CharSpan;
/** Text of this token in the original paragraph. */
origText: string;
/** Character span within the original paragraph. */
origCharSpan: CharSpan;
/** Lemma of the token e.g. bezpecny. null if not requested/supported. */
deepLemma: string | null;
/** Simple lemma of the token, e.g. nejnebezpecnejsi (in Cz, includes negation and grade). Null if not requested/supported. */
lemma: string | null;
/** Google universal tag. Null if not requested/supported. */
pos: UPos | null;
/** Morphological tag, e.g. AAMS1-...., VBD, ... Null if not requested/supported. */
morphTag: string | null;
/** Universal and custom features */
feats: Map<string, string>;
/** Label of the dependency edge. Null if not requested/supported. */
fnc: UDep | null;
/** Dependency sub-function. None if not requested/supported. */
subFnc: string | null;
/** Lemma info features, a list of strings. */
static readonly FEAT_LEMMA_INFO = "lemmaInfo";
/** Key presence signifies it is a negated word, value = true. */
static readonly FEAT_NEGATED = "negated";
/** Key presence signifies it is an unknown word, value = true. */
static readonly FEAT_UNKNOWN = "unknown";
private constructor();
/** Token factory method, public constructor. */
static of(id: string, idx: number, text: string, charSpan: CharSpan, origText?: string | null, origCharSpan?: CharSpan | null, deepLemma?: string | null, lemma?: string | null, pos?: UPos | null, morphTag?: string | null, feats?: Map<string, string> | null, fnc?: UDep | null, subFnc?: string | null): Token;
/** True iff the token form contains a negation prefix. */
isNegated(): boolean;
/** True iff the token is unknown to the lemmatizer. The lemma provided is the same as the token itself. */
isUnknown(): boolean;
/**
* Token following of preceding this token within the sentence.
* @param offset Relative offset. The following tokens have a positive offse,
* preceding a negative one. The ext token has offset = 1.
* @returns The token at the relative offset or null if the offset is invalid.
*/
offsetToken(offset: number): Token | null;
/** The previous token or null if this token is sentence initial. */
previous(): Token | null;
/** The next token or null if this token is sentence final. */
next(): Token | null;
/**
* Full dependency function in the format `{fnc}:{subFnc}` if the sub-function is present.
* Otherwise it's the same as `fnc`.
*/
get fullFnc(): string | null;
/** Converts the token to a default non-recursive string: index + text. */
toSimpleString(): string;
/** Converts the token to a non-recursive string: index + [text] + [pos] + [fnc]. */
toStringWith(text: boolean, pos: boolean, fnc: boolean): string;
toString(): string;
}
/**
* Tokens within a single sentence; ordered by word-order; non-empty, continuous or discontinuous.
*/
declare class TokenSupport {
tokens: Token[];
isContinuous: boolean;
/**
*
* @param tokens The tokens of this support.
* @param isContinuous Is this support a continuous sequence of tokens, i.e. a token span?
*/
constructor(tokens: Token[], isContinuous: boolean);
/**
* Creates a TokenSupport object from a list of tokens.
* @param tokens Non-empty list of tokens (no need for them to be sorted).
*/
static of(tokens: Token[]): TokenSupport;
get sentence(): Sentence;
get paragraph(): Paragraph;
get first(): Token;
get last(): Token;
get size(): number;
get ids(): string[];
/**
* The character span between the first and last token relative to the enclosing paragraph;
* for discontinuous support this includes intervening gaps.
*/
get charSpan(): CharSpan;
/** Offset of the first character of these tokens within the enclosing paragraph. */
get firstCharParaOffset(): number;
/** Offset of the last character of these tokens within the enclosing paragraph. */
get lastCharParaOffset(): number;
/**
* Substring of a full text as denoted by this support (before correction).
* For discontinuous supports, the result includes the intervening gaps.
*/
get text(): string;
/** Breaks this token support into continuous sub-sequences of tokens. */
spans(): TokenSupport[];
/** The coverage texts of each of the continuous spans, ordered by word-order. */
textSpans(): string[];
toString(): string;
}
export { Token, TokenSupport };