lindera-wasm
Version:
A morphological analysis library for WebAssembly.
410 lines (380 loc) • 15.6 kB
TypeScript
/* tslint:disable */
/* eslint-disable */
/**
* Compression algorithm for dictionary data.
*/
export enum CompressionAlgorithm {
Deflate = 0,
Zlib = 1,
Gzip = 2,
Raw = 3,
}
/**
* A morphological analysis dictionary.
*/
export class Dictionary {
private constructor();
free(): void;
[Symbol.dispose](): void;
}
/**
* Field definition in dictionary schema.
*/
export class FieldDefinition {
private constructor();
free(): void;
[Symbol.dispose](): void;
get description(): string | undefined;
set description(value: string | null | undefined);
field_type: FieldType;
index: number;
name: string;
}
/**
* Field type in dictionary schema.
*/
export enum FieldType {
/**
* Surface form (word text)
*/
Surface = 0,
/**
* Left context ID for morphological analysis
*/
LeftContextId = 1,
/**
* Right context ID for morphological analysis
*/
RightContextId = 2,
/**
* Word cost (used in path selection)
*/
Cost = 3,
/**
* Custom field (morphological features)
*/
Custom = 4,
}
/**
* Error type for Lindera operations.
*/
export class LinderaError {
private constructor();
free(): void;
[Symbol.dispose](): void;
message: string;
}
/**
* Dictionary metadata configuration.
*/
export class Metadata {
private constructor();
free(): void;
[Symbol.dispose](): void;
}
/**
* Tokenization mode.
*
* Determines how text is segmented into tokens.
*/
export enum Mode {
/**
* Standard tokenization based on dictionary cost
*/
Normal = 0,
/**
* Decompose compound words using penalty-based segmentation
*/
Decompose = 1,
}
/**
* Penalty configuration for decompose mode.
*
* Controls how aggressively compound words are decomposed based on
* character type and length thresholds.
*/
export class Penalty {
private constructor();
free(): void;
[Symbol.dispose](): void;
kanji_penalty_length_penalty: number;
kanji_penalty_length_threshold: number;
other_penalty_length_penalty: number;
other_penalty_length_threshold: number;
}
/**
* Dictionary schema definition.
*/
export class Schema {
private constructor();
free(): void;
[Symbol.dispose](): void;
}
/**
* Core segmenter for morphological analysis.
*/
export class Segmenter {
private constructor();
free(): void;
[Symbol.dispose](): void;
}
/**
* Token object wrapping the Rust Token data.
*
* This class provides robust access to token field and details.
*/
export class Token {
private constructor();
free(): void;
[Symbol.dispose](): void;
/**
* End byte position in the original text.
*/
byte_end: number;
/**
* Start byte position in the original text.
*/
byte_start: number;
/**
* Morphological details of the token.
*/
details: string[];
/**
* Position index of the token.
*/
position: number;
/**
* Surface form of the token.
*/
surface: string;
/**
* Word ID in the dictionary.
*/
word_id: number;
}
/**
* A tokenizer for morphological analysis.
*/
export class Tokenizer {
free(): void;
[Symbol.dispose](): void;
constructor(dictionary: Dictionary, mode?: string | null, user_dictionary?: UserDictionary | null);
/**
* Tokenizes the input text.
*/
tokenize(input_text: string): Token[];
}
/**
* Builder for creating a [`Tokenizer`] instance.
*
* `TokenizerBuilder` provides a fluent API for configuring and building a tokenizer
* with various options such as dictionary selection, tokenization mode, character filters,
* and token filters.
*/
export class TokenizerBuilder {
free(): void;
[Symbol.dispose](): void;
/**
* Appends a character filter to the tokenization pipeline.
*/
appendCharacterFilter(name: string, args: any): void;
/**
* Appends a token filter to the tokenization pipeline.
*/
appendTokenFilter(name: string, args: any): void;
append_character_filter(name: string, args: any): void;
append_token_filter(name: string, args: any): void;
/**
* Builds and returns a configured [`Tokenizer`] instance.
*/
build(): Tokenizer;
/**
* Creates a new `TokenizerBuilder` instance.
*/
constructor();
/**
* Sets the dictionary to use for tokenization.
*/
setDictionary(uri: string): void;
/**
* Sets whether to keep whitespace tokens in the output.
*/
setKeepWhitespace(keep: boolean): void;
/**
* Sets the tokenization mode.
*/
setMode(mode: string): void;
/**
* Sets a user-defined dictionary.
*/
setUserDictionary(uri: string): void;
set_dictionary(uri: string): void;
set_keep_whitespace(keep: boolean): void;
set_mode(mode: string): void;
set_user_dictionary(uri: string): void;
}
/**
* A user-defined dictionary for custom words.
*/
export class UserDictionary {
private constructor();
free(): void;
[Symbol.dispose](): void;
}
/**
* Builds a dictionary from source files.
*/
export function buildDictionary(input_dir: string, output_dir: string, metadata: Metadata): void;
/**
* Builds a user dictionary from a CSV file.
*/
export function buildUserDictionary(input_file: string, output_dir: string, metadata?: Metadata | null): void;
export function build_dictionary(input_dir: string, output_dir: string, metadata: Metadata): void;
export function build_user_dictionary(input_file: string, output_dir: string, metadata?: Metadata | null): void;
/**
* Gets the version of the lindera-wasm library.
* Backward compatibility alias for version().
*/
export function getVersion(): string;
/**
* Loads a dictionary from the specified URI.
*/
export function loadDictionary(uri: string): Dictionary;
/**
* Loads a user dictionary from the specified URI.
*/
export function loadUserDictionary(uri: string, metadata: Metadata): UserDictionary;
export function load_dictionary(uri: string): Dictionary;
export function load_user_dictionary(uri: string, metadata: Metadata): UserDictionary;
/**
* Returns the version of the lindera-wasm package.
*/
export function version(): string;
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
export interface InitOutput {
readonly memory: WebAssembly.Memory;
readonly __wbg_linderaerror_free: (a: number, b: number) => void;
readonly __wbg_metadata_free: (a: number, b: number) => void;
readonly jslinderaerror_new: (a: number, b: number) => number;
readonly jslinderaerror_toString: (a: number) => [number, number];
readonly jsmetadata_compress_algorithm: (a: number) => number;
readonly jsmetadata_createDefault: () => number;
readonly jsmetadata_dictionary_schema: (a: number) => number;
readonly jsmetadata_encoding: (a: number) => [number, number];
readonly jsmetadata_name: (a: number) => [number, number];
readonly jsmetadata_new: (a: number, b: number, c: number, d: number, e: number) => number;
readonly jsmetadata_set_compress_algorithm: (a: number, b: number) => void;
readonly jsmetadata_set_dictionary_schema: (a: number, b: number) => void;
readonly jsmetadata_set_encoding: (a: number, b: number, c: number) => void;
readonly jsmetadata_set_name: (a: number, b: number, c: number) => void;
readonly jsmetadata_set_user_dictionary_schema: (a: number, b: number) => void;
readonly jsmetadata_user_dictionary_schema: (a: number) => number;
readonly __wbg_get_token_byte_end: (a: number) => number;
readonly __wbg_get_token_details: (a: number) => [number, number];
readonly __wbg_get_token_position: (a: number) => number;
readonly __wbg_get_token_word_id: (a: number) => number;
readonly __wbg_set_token_byte_end: (a: number, b: number) => void;
readonly __wbg_set_token_details: (a: number, b: number, c: number) => void;
readonly __wbg_set_token_position: (a: number, b: number) => void;
readonly __wbg_set_token_word_id: (a: number, b: number) => void;
readonly __wbg_token_free: (a: number, b: number) => void;
readonly jstoken_getDetail: (a: number, b: number) => [number, number];
readonly jstoken_toJSON: (a: number) => any;
readonly __wbg_get_penalty_kanji_penalty_length_penalty: (a: number) => number;
readonly __wbg_get_penalty_kanji_penalty_length_threshold: (a: number) => number;
readonly __wbg_get_penalty_other_penalty_length_penalty: (a: number) => number;
readonly __wbg_get_penalty_other_penalty_length_threshold: (a: number) => number;
readonly __wbg_penalty_free: (a: number, b: number) => void;
readonly __wbg_segmenter_free: (a: number, b: number) => void;
readonly __wbg_set_penalty_kanji_penalty_length_penalty: (a: number, b: number) => void;
readonly __wbg_set_penalty_kanji_penalty_length_threshold: (a: number, b: number) => void;
readonly __wbg_set_penalty_other_penalty_length_penalty: (a: number, b: number) => void;
readonly __wbg_set_penalty_other_penalty_length_threshold: (a: number, b: number) => void;
readonly build_dictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number];
readonly build_user_dictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number];
readonly getVersion: () => [number, number];
readonly jspenalty_new: (a: number, b: number, c: number, d: number) => number;
readonly load_dictionary: (a: number, b: number) => [number, number, number];
readonly load_user_dictionary: (a: number, b: number, c: number) => [number, number, number];
readonly __wbg_tokenizer_free: (a: number, b: number) => void;
readonly __wbg_tokenizerbuilder_free: (a: number, b: number) => void;
readonly tokenizer_new: (a: number, b: number, c: number, d: number) => [number, number, number];
readonly tokenizer_tokenize: (a: number, b: number, c: number) => [number, number, number, number];
readonly tokenizerbuilder_appendCharacterFilter: (a: number, b: number, c: number, d: any) => [number, number];
readonly tokenizerbuilder_appendTokenFilter: (a: number, b: number, c: number, d: any) => [number, number];
readonly tokenizerbuilder_append_character_filter: (a: number, b: number, c: number, d: any) => [number, number];
readonly tokenizerbuilder_append_token_filter: (a: number, b: number, c: number, d: any) => [number, number];
readonly tokenizerbuilder_build: (a: number) => [number, number, number];
readonly tokenizerbuilder_new: () => [number, number, number];
readonly tokenizerbuilder_setDictionary: (a: number, b: number, c: number) => [number, number];
readonly tokenizerbuilder_setKeepWhitespace: (a: number, b: number) => [number, number];
readonly tokenizerbuilder_setMode: (a: number, b: number, c: number) => [number, number];
readonly tokenizerbuilder_setUserDictionary: (a: number, b: number, c: number) => [number, number];
readonly tokenizerbuilder_set_dictionary: (a: number, b: number, c: number) => [number, number];
readonly tokenizerbuilder_set_keep_whitespace: (a: number, b: number) => [number, number];
readonly tokenizerbuilder_set_mode: (a: number, b: number, c: number) => [number, number];
readonly tokenizerbuilder_set_user_dictionary: (a: number, b: number, c: number) => [number, number];
readonly __wbg_fielddefinition_free: (a: number, b: number) => void;
readonly __wbg_get_fielddefinition_description: (a: number) => [number, number];
readonly __wbg_get_fielddefinition_field_type: (a: number) => number;
readonly __wbg_get_fielddefinition_index: (a: number) => number;
readonly __wbg_get_fielddefinition_name: (a: number) => [number, number];
readonly __wbg_schema_free: (a: number, b: number) => void;
readonly __wbg_set_fielddefinition_description: (a: number, b: number, c: number) => void;
readonly __wbg_set_fielddefinition_field_type: (a: number, b: number) => void;
readonly __wbg_set_fielddefinition_index: (a: number, b: number) => void;
readonly __wbg_set_fielddefinition_name: (a: number, b: number, c: number) => void;
readonly jsfielddefinition_new: (a: number, b: number, c: number, d: number, e: number, f: number) => number;
readonly jsschema_create_default: () => number;
readonly jsschema_field_count: (a: number) => number;
readonly jsschema_get_all_fields: (a: number) => [number, number];
readonly jsschema_get_custom_fields: (a: number) => [number, number];
readonly jsschema_get_field_by_name: (a: number, b: number, c: number) => number;
readonly jsschema_get_field_index: (a: number, b: number, c: number) => number;
readonly jsschema_get_field_name: (a: number, b: number) => [number, number];
readonly jsschema_new: (a: number, b: number) => number;
readonly __wbg_dictionary_free: (a: number, b: number) => void;
readonly __wbg_userdictionary_free: (a: number, b: number) => void;
readonly buildDictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number];
readonly buildUserDictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number];
readonly jsdictionary_encoding: (a: number) => [number, number];
readonly jsdictionary_metadata: (a: number) => number;
readonly jsdictionary_name: (a: number) => [number, number];
readonly loadDictionary: (a: number, b: number) => [number, number, number];
readonly loadUserDictionary: (a: number, b: number, c: number) => [number, number, number];
readonly version: () => [number, number];
readonly __wbg_set_token_byte_start: (a: number, b: number) => void;
readonly __wbg_set_token_surface: (a: number, b: number, c: number) => void;
readonly __wbg_set_linderaerror_message: (a: number, b: number, c: number) => void;
readonly __wbg_get_token_byte_start: (a: number) => number;
readonly __wbg_get_token_surface: (a: number) => [number, number];
readonly __wbg_get_linderaerror_message: (a: number) => [number, number];
readonly __wbindgen_malloc: (a: number, b: number) => number;
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
readonly __wbindgen_exn_store: (a: number) => void;
readonly __externref_table_alloc: () => number;
readonly __wbindgen_externrefs: WebAssembly.Table;
readonly __externref_table_dealloc: (a: number) => void;
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
readonly __externref_drop_slice: (a: number, b: number) => void;
readonly __wbindgen_start: () => void;
}
export type SyncInitInput = BufferSource | WebAssembly.Module;
/**
* Instantiates the given `module`, which can either be bytes or
* a precompiled `WebAssembly.Module`.
*
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
*
* @returns {InitOutput}
*/
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
/**
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
* for everything else, calls `WebAssembly.instantiate` directly.
*
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
*
* @returns {Promise<InitOutput>}
*/
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;