conllu-core
Version:
A core type to handle CoNLL-U format
74 lines (73 loc) • 4.18 kB
TypeScript
import { AdvanceDep, EmptyId, EmptyToken, Feature, HeadId, NominalToken, Relation, Sentence, UPOS, XPOS } from ".";
/**
* The policy to adjust a `head` field that point to a token being merge.
* Every other tokens will have their head value adjust accordingly but the `head` that point
* to token being merge may need different treatment depending on user requirement.
*/
export declare enum HeadPolicy {
/** Update all the `head` pointed to the token within merge range to a merged `ID` */
Adjust = 0,
/** Remove any `head` linked to the token within merge range */
Remove = 1
}
declare type Mergeable = (NominalToken | EmptyToken);
/**
* A merging policy.
*
* It has following attributes:
* - `headPol` field will determined how all dependants shall be handle.
* The default value is `HeadPolicy.Adjust`
* - `lemma` field is a callback that takes all tokens being merged as argument and return merged lemma.
* The default value is every lemma concatenated together or undefined.
* - `upos` field is a callback that takes all tokens being merged as argument and return merged part-of-speech.
* The default value is a part-of-speech of first token being merge.
* - `xpos` an optional field which is a callback that takes all tokens being merged as argument and return merged language specific part-of-speech.
* - `feats` field is a callback that takes all tokens being merged as argument and return merged feature(s).
* The default value is a flatten merge of all unique features from every tokens being merged.
* - `headRels` field is a callback that takes all tokens being merged as argument and return merged `head` and `deprel` fields.
* The default is first token being merged head/deprel field if there's no root `Relation` in merging, otherwise, it
* will become new root.
* - `deps` field is a callback that takes all tokens being merged as argument and return merged `deps` fields.
* The default is merged of every unique deps field of tokens being merged.
* - `misc` field is a callback that takes all tokens being merged as argument and return merged `misc` fields.
* The default value is all unique of flatten map misc fields of every tokens.
*/
export declare class MergePolicy<X extends XPOS> {
headPol: HeadPolicy;
lemma: (tokens: Mergeable[]) => string;
upos: (tokens: Mergeable[]) => UPOS;
xpos?: (tokens: Mergeable[]) => X;
feats: (tokens: Mergeable[]) => Feature[];
headRels: (tokens: NominalToken[]) => [HeadId, Relation];
deps: (tokens: Mergeable[]) => AdvanceDep[];
misc: (tokens: Mergeable[]) => string[];
}
export declare type MergeableId = HeadId | [HeadId, EmptyId];
/**
* Perform tokens merging.
* It is an error to merge different types of tokens.
* For example, you can **not** merge `from` = 0.1 and `to` = 2.
* If merging token cause a `CompoundToken` that refering to it become invalid,
* it will automatically remove that `CompoundToken` out.
*
* It will result in undefined behavior if `Sentence` being merge is invalid.
* Caller should ensure that `Sentence` is valid. method `validate` of `Sentence`
* can be used for such validation.
*
* You can merge multiple `EmptyToken`.
* You can merge multiple `NominalToken`.
* If there's an `EmptyToken` or a `CompoundToken` between `NominalToken` being merged,
* it will automatically remove.
* You can never merge `CompoundToken`.
*
* It may end up with invalid `EmptyToken` if it any EmptyToken have single dependency that depends on
* the token being merged. Such invalid `EmptyToken` will be retain. It is user responsibility
* to fix the dependency for every `EmptyToken` that become invalid.
*
* @param sentence An object of Sentence to merge tokens
* @param from A first token to be merge. It is an `id`, not an `index` of token.
* @param to An inclusive id of last token to be merged. It is an `id`, not an `index` of token.
* @param policy Specify how to treat a head that point to any token being merged. The default policy is to `Adjust`
*/
export declare function mergeTokens<X extends XPOS>(sentence: Sentence, from: MergeableId, to: MergeableId, policy: MergePolicy<X>): void;
export {};