UNPKG

lindera-wasm

Version:

A morphological analysis library for WebAssembly.

410 lines (380 loc) 15.6 kB
/* tslint:disable */ /* eslint-disable */ /** * Compression algorithm for dictionary data. */ export enum CompressionAlgorithm { Deflate = 0, Zlib = 1, Gzip = 2, Raw = 3, } /** * A morphological analysis dictionary. */ export class Dictionary { private constructor(); free(): void; [Symbol.dispose](): void; } /** * Field definition in dictionary schema. */ export class FieldDefinition { private constructor(); free(): void; [Symbol.dispose](): void; get description(): string | undefined; set description(value: string | null | undefined); field_type: FieldType; index: number; name: string; } /** * Field type in dictionary schema. */ export enum FieldType { /** * Surface form (word text) */ Surface = 0, /** * Left context ID for morphological analysis */ LeftContextId = 1, /** * Right context ID for morphological analysis */ RightContextId = 2, /** * Word cost (used in path selection) */ Cost = 3, /** * Custom field (morphological features) */ Custom = 4, } /** * Error type for Lindera operations. */ export class LinderaError { private constructor(); free(): void; [Symbol.dispose](): void; message: string; } /** * Dictionary metadata configuration. */ export class Metadata { private constructor(); free(): void; [Symbol.dispose](): void; } /** * Tokenization mode. * * Determines how text is segmented into tokens. */ export enum Mode { /** * Standard tokenization based on dictionary cost */ Normal = 0, /** * Decompose compound words using penalty-based segmentation */ Decompose = 1, } /** * Penalty configuration for decompose mode. * * Controls how aggressively compound words are decomposed based on * character type and length thresholds. */ export class Penalty { private constructor(); free(): void; [Symbol.dispose](): void; kanji_penalty_length_penalty: number; kanji_penalty_length_threshold: number; other_penalty_length_penalty: number; other_penalty_length_threshold: number; } /** * Dictionary schema definition. */ export class Schema { private constructor(); free(): void; [Symbol.dispose](): void; } /** * Core segmenter for morphological analysis. */ export class Segmenter { private constructor(); free(): void; [Symbol.dispose](): void; } /** * Token object wrapping the Rust Token data. * * This class provides robust access to token field and details. */ export class Token { private constructor(); free(): void; [Symbol.dispose](): void; /** * End byte position in the original text. */ byte_end: number; /** * Start byte position in the original text. */ byte_start: number; /** * Morphological details of the token. */ details: string[]; /** * Position index of the token. */ position: number; /** * Surface form of the token. */ surface: string; /** * Word ID in the dictionary. */ word_id: number; } /** * A tokenizer for morphological analysis. */ export class Tokenizer { free(): void; [Symbol.dispose](): void; constructor(dictionary: Dictionary, mode?: string | null, user_dictionary?: UserDictionary | null); /** * Tokenizes the input text. */ tokenize(input_text: string): Token[]; } /** * Builder for creating a [`Tokenizer`] instance. * * `TokenizerBuilder` provides a fluent API for configuring and building a tokenizer * with various options such as dictionary selection, tokenization mode, character filters, * and token filters. */ export class TokenizerBuilder { free(): void; [Symbol.dispose](): void; /** * Appends a character filter to the tokenization pipeline. */ appendCharacterFilter(name: string, args: any): void; /** * Appends a token filter to the tokenization pipeline. */ appendTokenFilter(name: string, args: any): void; append_character_filter(name: string, args: any): void; append_token_filter(name: string, args: any): void; /** * Builds and returns a configured [`Tokenizer`] instance. */ build(): Tokenizer; /** * Creates a new `TokenizerBuilder` instance. */ constructor(); /** * Sets the dictionary to use for tokenization. */ setDictionary(uri: string): void; /** * Sets whether to keep whitespace tokens in the output. */ setKeepWhitespace(keep: boolean): void; /** * Sets the tokenization mode. */ setMode(mode: string): void; /** * Sets a user-defined dictionary. */ setUserDictionary(uri: string): void; set_dictionary(uri: string): void; set_keep_whitespace(keep: boolean): void; set_mode(mode: string): void; set_user_dictionary(uri: string): void; } /** * A user-defined dictionary for custom words. */ export class UserDictionary { private constructor(); free(): void; [Symbol.dispose](): void; } /** * Builds a dictionary from source files. */ export function buildDictionary(input_dir: string, output_dir: string, metadata: Metadata): void; /** * Builds a user dictionary from a CSV file. */ export function buildUserDictionary(input_file: string, output_dir: string, metadata?: Metadata | null): void; export function build_dictionary(input_dir: string, output_dir: string, metadata: Metadata): void; export function build_user_dictionary(input_file: string, output_dir: string, metadata?: Metadata | null): void; /** * Gets the version of the lindera-wasm library. * Backward compatibility alias for version(). */ export function getVersion(): string; /** * Loads a dictionary from the specified URI. */ export function loadDictionary(uri: string): Dictionary; /** * Loads a user dictionary from the specified URI. */ export function loadUserDictionary(uri: string, metadata: Metadata): UserDictionary; export function load_dictionary(uri: string): Dictionary; export function load_user_dictionary(uri: string, metadata: Metadata): UserDictionary; /** * Returns the version of the lindera-wasm package. */ export function version(): string; export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module; export interface InitOutput { readonly memory: WebAssembly.Memory; readonly __wbg_linderaerror_free: (a: number, b: number) => void; readonly __wbg_metadata_free: (a: number, b: number) => void; readonly jslinderaerror_new: (a: number, b: number) => number; readonly jslinderaerror_toString: (a: number) => [number, number]; readonly jsmetadata_compress_algorithm: (a: number) => number; readonly jsmetadata_createDefault: () => number; readonly jsmetadata_dictionary_schema: (a: number) => number; readonly jsmetadata_encoding: (a: number) => [number, number]; readonly jsmetadata_name: (a: number) => [number, number]; readonly jsmetadata_new: (a: number, b: number, c: number, d: number, e: number) => number; readonly jsmetadata_set_compress_algorithm: (a: number, b: number) => void; readonly jsmetadata_set_dictionary_schema: (a: number, b: number) => void; readonly jsmetadata_set_encoding: (a: number, b: number, c: number) => void; readonly jsmetadata_set_name: (a: number, b: number, c: number) => void; readonly jsmetadata_set_user_dictionary_schema: (a: number, b: number) => void; readonly jsmetadata_user_dictionary_schema: (a: number) => number; readonly __wbg_get_token_byte_end: (a: number) => number; readonly __wbg_get_token_details: (a: number) => [number, number]; readonly __wbg_get_token_position: (a: number) => number; readonly __wbg_get_token_word_id: (a: number) => number; readonly __wbg_set_token_byte_end: (a: number, b: number) => void; readonly __wbg_set_token_details: (a: number, b: number, c: number) => void; readonly __wbg_set_token_position: (a: number, b: number) => void; readonly __wbg_set_token_word_id: (a: number, b: number) => void; readonly __wbg_token_free: (a: number, b: number) => void; readonly jstoken_getDetail: (a: number, b: number) => [number, number]; readonly jstoken_toJSON: (a: number) => any; readonly __wbg_get_penalty_kanji_penalty_length_penalty: (a: number) => number; readonly __wbg_get_penalty_kanji_penalty_length_threshold: (a: number) => number; readonly __wbg_get_penalty_other_penalty_length_penalty: (a: number) => number; readonly __wbg_get_penalty_other_penalty_length_threshold: (a: number) => number; readonly __wbg_penalty_free: (a: number, b: number) => void; readonly __wbg_segmenter_free: (a: number, b: number) => void; readonly __wbg_set_penalty_kanji_penalty_length_penalty: (a: number, b: number) => void; readonly __wbg_set_penalty_kanji_penalty_length_threshold: (a: number, b: number) => void; readonly __wbg_set_penalty_other_penalty_length_penalty: (a: number, b: number) => void; readonly __wbg_set_penalty_other_penalty_length_threshold: (a: number, b: number) => void; readonly build_dictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly build_user_dictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly getVersion: () => [number, number]; readonly jspenalty_new: (a: number, b: number, c: number, d: number) => number; readonly load_dictionary: (a: number, b: number) => [number, number, number]; readonly load_user_dictionary: (a: number, b: number, c: number) => [number, number, number]; readonly __wbg_tokenizer_free: (a: number, b: number) => void; readonly __wbg_tokenizerbuilder_free: (a: number, b: number) => void; readonly tokenizer_new: (a: number, b: number, c: number, d: number) => [number, number, number]; readonly tokenizer_tokenize: (a: number, b: number, c: number) => [number, number, number, number]; readonly tokenizerbuilder_appendCharacterFilter: (a: number, b: number, c: number, d: any) => [number, number]; readonly tokenizerbuilder_appendTokenFilter: (a: number, b: number, c: number, d: any) => [number, number]; readonly tokenizerbuilder_append_character_filter: (a: number, b: number, c: number, d: any) => [number, number]; readonly tokenizerbuilder_append_token_filter: (a: number, b: number, c: number, d: any) => [number, number]; readonly tokenizerbuilder_build: (a: number) => [number, number, number]; readonly tokenizerbuilder_new: () => [number, number, number]; readonly tokenizerbuilder_setDictionary: (a: number, b: number, c: number) => [number, number]; readonly tokenizerbuilder_setKeepWhitespace: (a: number, b: number) => [number, number]; readonly tokenizerbuilder_setMode: (a: number, b: number, c: number) => [number, number]; readonly tokenizerbuilder_setUserDictionary: (a: number, b: number, c: number) => [number, number]; readonly tokenizerbuilder_set_dictionary: (a: number, b: number, c: number) => [number, number]; readonly tokenizerbuilder_set_keep_whitespace: (a: number, b: number) => [number, number]; readonly tokenizerbuilder_set_mode: (a: number, b: number, c: number) => [number, number]; readonly tokenizerbuilder_set_user_dictionary: (a: number, b: number, c: number) => [number, number]; readonly __wbg_fielddefinition_free: (a: number, b: number) => void; readonly __wbg_get_fielddefinition_description: (a: number) => [number, number]; readonly __wbg_get_fielddefinition_field_type: (a: number) => number; readonly __wbg_get_fielddefinition_index: (a: number) => number; readonly __wbg_get_fielddefinition_name: (a: number) => [number, number]; readonly __wbg_schema_free: (a: number, b: number) => void; readonly __wbg_set_fielddefinition_description: (a: number, b: number, c: number) => void; readonly __wbg_set_fielddefinition_field_type: (a: number, b: number) => void; readonly __wbg_set_fielddefinition_index: (a: number, b: number) => void; readonly __wbg_set_fielddefinition_name: (a: number, b: number, c: number) => void; readonly jsfielddefinition_new: (a: number, b: number, c: number, d: number, e: number, f: number) => number; readonly jsschema_create_default: () => number; readonly jsschema_field_count: (a: number) => number; readonly jsschema_get_all_fields: (a: number) => [number, number]; readonly jsschema_get_custom_fields: (a: number) => [number, number]; readonly jsschema_get_field_by_name: (a: number, b: number, c: number) => number; readonly jsschema_get_field_index: (a: number, b: number, c: number) => number; readonly jsschema_get_field_name: (a: number, b: number) => [number, number]; readonly jsschema_new: (a: number, b: number) => number; readonly __wbg_dictionary_free: (a: number, b: number) => void; readonly __wbg_userdictionary_free: (a: number, b: number) => void; readonly buildDictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly buildUserDictionary: (a: number, b: number, c: number, d: number, e: number) => [number, number]; readonly jsdictionary_encoding: (a: number) => [number, number]; readonly jsdictionary_metadata: (a: number) => number; readonly jsdictionary_name: (a: number) => [number, number]; readonly loadDictionary: (a: number, b: number) => [number, number, number]; readonly loadUserDictionary: (a: number, b: number, c: number) => [number, number, number]; readonly version: () => [number, number]; readonly __wbg_set_token_byte_start: (a: number, b: number) => void; readonly __wbg_set_token_surface: (a: number, b: number, c: number) => void; readonly __wbg_set_linderaerror_message: (a: number, b: number, c: number) => void; readonly __wbg_get_token_byte_start: (a: number) => number; readonly __wbg_get_token_surface: (a: number) => [number, number]; readonly __wbg_get_linderaerror_message: (a: number) => [number, number]; readonly __wbindgen_malloc: (a: number, b: number) => number; readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number; readonly __wbindgen_exn_store: (a: number) => void; readonly __externref_table_alloc: () => number; readonly __wbindgen_externrefs: WebAssembly.Table; readonly __externref_table_dealloc: (a: number) => void; readonly __wbindgen_free: (a: number, b: number, c: number) => void; readonly __externref_drop_slice: (a: number, b: number) => void; readonly __wbindgen_start: () => void; } export type SyncInitInput = BufferSource | WebAssembly.Module; /** * Instantiates the given `module`, which can either be bytes or * a precompiled `WebAssembly.Module`. * * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated. * * @returns {InitOutput} */ export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput; /** * If `module_or_path` is {RequestInfo} or {URL}, makes a request and * for everything else, calls `WebAssembly.instantiate` directly. * * @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated. * * @returns {Promise<InitOutput>} */ export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;