UNPKG

conllu-core

Version:
114 lines (113 loc) 5.98 kB
import { AdvanceDep, Comment, DepsRelation, EmptyToken, Feature, HeadId, Meta, NominalToken, Relation, Sentence, Token, TokenIdMap, TokenType, UPOS, XPOS } from "."; /** * The policy to adjust a `head` field that point to a token being merge. * Every other tokens will have their head value adjust accordingly but the `head` that point * to token being merge may need different treatment depending on user requirement. */ export declare enum HeadPolicy { /** Update all the `head` pointed to the token within merge range to a merged `ID` */ Adjust = 0, /** Remove any `head` linked to the token within merge range */ Remove = 1 } export declare class SentenceBuilder { meta: (Meta | Comment)[]; tokens: Token[]; id_map: TokenIdMap; /** * Make this builder out of existing Sentence. * It will make a shallow copy of existing sentence so any modification * with this builder will also immediately reflect on original sentence. */ static from(sentence: Sentence): SentenceBuilder; /** Append given meta into this sentence */ push_meta(key: string, value: string): void; /** Append comment into this sentence */ push_comment(text: string): void; /** Find meta by given `key` */ find_meta(key: string): Meta; /** Find index of meta from given `key` */ find_meta_index(key: string): number; /** Append given token into this sentence */ push_token(t: Token): void; /** Find a token by an `id` */ find_token_by_id(id: number | [number, number], type: TokenType): Token; /** Get id of token at given index or undefined if token at given index is Compound token */ get_id_by_index(index: number): number | [number, number]; /** Set head and deprel field of token at given `token_index` argument to ID of token at `head_index`. */ upsert_head_by_index(token_index: number, head_index: number, relation: Relation): void; /** Add or replace a dep in deps field of given token index. The dep to be add/replace use head index instead of ID */ upsert_dep_by_index(token_index: number, head_index: number, relation: DepsRelation): void; /** Insert a `token` at given `index`. The index must be <= number of existing tokens. */ insert_token(token: Token, index: number): void; /** * Remove a token at given index and update all dependencies to it based on given policy. * It return the removed token without update any field value of it. */ remove_token(index: number, policy?: HeadPolicy): Token; /** * Merge tokens using index, not ID. * Both `from` and `to` are index of token. * It's inclusive at both end so if `from = 1`, and `to = 2`, it will merge * token at index 1 and 2 into 1 token. * The field value of merged token will depends on `policy`. */ merge<X extends XPOS>(from: number, to: number, policy?: MergePolicy<X>): void; /** * Split a token at given `index`. It take `at` argument which is a list of index * of location of `form` field of a token to be splitted. All other fields depends * on `policy` argument. * It update all other dependencies by shifting all the ID accordingly. * * The token at given `index` must either be NominalToken or EmptyToken. */ split(index: number, at: number[], policy?: SplitPolicy<XPOS>): void; /** Build Sentence object out of this builder */ build(): Sentence; } declare type NonCompound = (NominalToken | EmptyToken); /** * A merging policy. * * It has following attributes: * - `headPol` field will determined how all dependants shall be handle. * The default value is `HeadPolicy.Adjust` * - `lemma` field is a callback that takes all tokens being merged as argument and return merged lemma. * The default value is every lemma concatenated together or undefined. * - `upos` field is a callback that takes all tokens being merged as argument and return merged part-of-speech. * The default value is a part-of-speech of first token being merge. * - `xpos` an optional field which is a callback that takes all tokens being merged as argument and return merged language specific part-of-speech. * - `feats` field is a callback that takes all tokens being merged as argument and return merged feature(s). * The default value is a flatten merge of all unique features from every tokens being merged. * - `headRels` field is a callback that takes all tokens being merged as argument and return merged `head` and `deprel` fields. * The default is first token being merged head/deprel field if there's no root `Relation` in merging, otherwise, it * will become new root. * - `deps` field is a callback that takes all tokens being merged as argument and return merged `deps` fields. * The default is merged of every unique deps field of tokens being merged. * - `misc` field is a callback that takes all tokens being merged as argument and return merged `misc` fields. * The default value is all unique of flatten map misc fields of every tokens. */ export declare class MergePolicy<X extends XPOS> { headPol: HeadPolicy; lemma: (tokens: NonCompound[]) => string; upos: (tokens: NonCompound[]) => UPOS; xpos?: (tokens: NonCompound[]) => X; feats: (tokens: NonCompound[]) => Feature[]; headRels: (tokens: NominalToken[]) => [HeadId, Relation]; deps: (tokens: NonCompound[]) => AdvanceDep[]; misc: (tokens: NonCompound[]) => string[]; } /** * Define how each property of splitted will be derived. * By default, all properties are copy from original token. */ export declare class SplitPolicy<X extends XPOS> { lemma: (token: NonCompound) => string; upos: (token: NonCompound) => UPOS; xpos?: (token: NonCompound) => X; feats: (tokens: NonCompound) => Feature[]; headRels: (token: NominalToken) => [HeadId, Relation]; deps: (token: NonCompound) => AdvanceDep[]; misc: (token: NonCompound) => string[]; } export {};