@thi.ng/text-analysis
Version:
Text tokenization, transformation & analysis transducers, utilities, stop words, porter stemming, vector encodings, similarities
42 lines • 1.55 kB
TypeScript
import type { Vocab } from "./api.js";
/**
* Trnasducer to produce lowercase string.
*/
export declare const lowercase: import("@thi.ng/transducers").Transducer<string, string>;
/**
* Trnasducer which collapses multiple whitespace chars into one.
*/
export declare const collapseWS: import("@thi.ng/transducers").Transducer<string, string>;
/**
* Transducer which removes empty or whitespace-only strings/tokens.
*/
export declare const removeEmpty: import("@thi.ng/transducers").Transducer<string, string>;
/**
* Transducer which removes non-alphabetic chars from input, using
* {@link RE_NON_ALPHA}.
*/
export declare const removeNonAlpha: import("@thi.ng/transducers").Transducer<string, string>;
/**
* Transducer which removes non-alphabetic chars from input, using
* {@link RE_NON_ALPHANUM}.
*/
export declare const removeNonAlphaNum: import("@thi.ng/transducers").Transducer<string, string>;
/**
* Transducer which removes tokens with their length outside the configured
* `[min,max]` range.
*
* @param min
* @param max
*/
export declare const minMaxLength: (min: number, max: number) => import("@thi.ng/transducers").Transducer<string, string>;
/**
* Transducer version of {@link stemWord}.
*/
export declare const stemOnly: import("@thi.ng/transducers").Transducer<string, string>;
/**
* Transducer which removes tokens which are not present in given `vocab`.
*
* @param vocab
*/
export declare const vocabOnly: (vocab: Vocab) => import("@thi.ng/transducers").Transducer<string, string>;
//# sourceMappingURL=xform.d.ts.map