cld3-asm

Version:

WebAssembly based Javascript bindings for google compact language detector 3

95 lines (94 loc) • 3.45 kB

TypeScript

export declare const UnknownLanguage: string; export interface LanguageResult { /** * Detected language. {UnknownLanguage} if detection fails. */ language: string; /** * Language probability. */ probability: number; /** * Whether the prediction is reliable. */ is_reliable: boolean; /** * Proportion of bytes associated with the language. If FindLanguage is * called, this variable is set to 1. */ proportion: number; } /** * @internal */ export interface ResultVector { size(): number; get(index: number): LanguageResult; } /** * @internal */ export interface NNetLanguageIdentifier { /** * Finds the most likely language for the given text, along with additional * information (e.g., probability). The prediction is based on the first N * bytes where N is the minumum between the number of interchange valid UTF8 * bytes and max_num_bytes_. If N is less than min_num_bytes_ long, then this * function returns kUnknown. */ FindLanguage(text: string): LanguageResult; /** * Splits the input text (up to the first byte, if any, that is not * interchange valid UTF8) into spans based on the script, predicts a language * for each span, and returns a vector storing the top num_langs most frequent * languages along with additional information (e.g., proportions). The number * of bytes considered for each span is the minimum between the size of the * span and max_num_bytes_. If more languages are requested than what is * available in the input, then for those cases kUnknown is returned. Also, if * the size of the span is less than min_num_bytes_ long, then the span is * skipped. If the input text is too long, only the first * kMaxNumInputBytesToConsider bytes are processed. */ FindTopNMostFreqLangs(text: string, numLangs: number): ResultVector; /** * Destroy instance of identifier */ delete(): void; } /** * @internal * * Interface for module generated by emscripten to load wasm binary. * https://kripken.github.io/emscripten-site/docs/api_reference/preamble.js.html */ export interface CldAsmModule { NNetLanguageIdentifier: { /** * Min number of bytes needed to make a prediction if the default constructor * is called. */ kMinNumBytesToConsider: number; /** * Max number of bytes to consider to make a prediction if the default * constructor is called. */ kMaxNumBytesToConsider: number; /** * Max number of input bytes to process. */ kMaxNumInputBytesToConsider: number; /** * Predictions with probability greater than or equal to this threshold are * marked as reliable. This threshold was optimized on a set of text segments * extracted from wikipedia, and results in an overall precision, recall, * and f1 equal to 0.9760, 0.9624, and 0.9692, respectively. */ kReliabilityThreshold: number; /** * Reliability threshold for the languages hr and bs. */ kReliabilityHrBsThreshold: number; new (minBytes: number, maxBytes: number): NNetLanguageIdentifier; }; initializeRuntime(): Promise<boolean>; }