UNPKG

@stdlib/nlp

Version:

Natural language processing.

202 lines (187 loc) 5.44 kB
/* * @license Apache-2.0 * * Copyright (c) 2021 The Stdlib Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // TypeScript Version: 4.1 /* eslint-disable max-lines */ import expandAcronyms = require( './../../expand-acronyms' ); import expandContractions = require( './../../expand-contractions' ); import lda = require( './../../lda' ); import ordinalize = require( './../../ordinalize' ); import porterStemmer = require( './../../porter-stemmer' ); import sentencize = require( './../../sentencize' ); import tokenize = require( './../../tokenize' ); /** * Interface describing the `nlp` namespace. */ interface Namespace { /** * Expands acronyms in a string. * * @param str - string to convert * @returns string with expanded acronyms * * @example * var str = 'LOL, this is fun. I am ROFL.'; * var out = ns.expandAcronyms( str ); * // returns 'laughing out loud, this is fun. I am rolling on the floor laughing.' * * @example * var str = 'brb, I need to check my mail. thx!'; * var out = ns.expandAcronyms( str ); * // returns 'be right back, I need to check my mail. thanks!' */ expandAcronyms: typeof expandAcronyms; /** * Expands all contractions to their formal equivalents. * * @param str - string to convert * @returns string with expanded contractions * * @example * var str = 'I won\'t be able to get y\'all out of this one.'; * var out = ns.expandContractions( str ); * // returns 'I will not be able to get you all out of this one.' * * @example * var str = 'It oughtn\'t to be my fault, because, you know, I didn\'t know'; * var out = ns.expandContractions( str ); * // returns 'It ought not to be my fault, because, you know, I did not know' */ expandContractions: typeof expandContractions; /** * Latent Dirichlet Allocation via collapsed Gibbs sampling. * * @param documents - document corpus * @param K - number of topics * @param options - options object * @param options.alpha - Dirichlet hyper-parameter of topic vector theta: * @param options.beta - Dirichlet hyper-parameter for word vector phi * @throws second argument must be a positive integer * @throws must provide valid options * @returns model object */ lda: typeof lda; /** * Converts an integer to an ordinal string (e.g., `1st`, `2nd`, etc.). * * @param value - string or number to convert * @param options - options * @param options.suffixOnly - boolean indicating whether to return only the suffix (default: false) * @param options.lang - language code (default: 'en') * @param options.gender - grammatical gender (used if applicable; either 'masculine' or 'feminine'; default: 'masculine') * @returns ordinal string or suffix * * @example * var out = ns.ordinalize( '1' ); * // returns '1st' * * @example * var out = ns.ordinalize( '2' ); * // returns '2nd' * * @example * var out = ns.ordinalize( '21' ); * // returns '21st' * * @example * var out = ns.ordinalize( '1', { 'lang': 'de' } ); * // returns '1.' * * @example * var out = ns.ordinalize( '7', { 'lang': 'es' } ); * // returns '7ª' */ ordinalize: typeof ordinalize; /** * Extracts the stem of a given word. * * @param word - input word * @returns word stem * * @example * var out = ns.porterStemmer( 'walking' ); * // returns 'walk' * * @example * var out = ns.porterStemmer( 'walked' ); * // returns 'walk' * * @example * var out = ns.porterStemmer( 'walks' ); * // returns 'walk' * * @example * var out = ns.porterStemmer( 'worldwide' ); * // returns 'worldwid' * * @example * var out = ns.porterStemmer( '' ); * // returns '' */ porterStemmer: typeof porterStemmer; /** * Splits a string into an array of sentences. * * @param str - input string * @returns array of sentences * * @example * var str = 'Hello World! How are you?'; * var out = ns.sentencize( str ); * // returns [ 'Hello World!', 'How are you?' ] * * @example * var str = ''; * var out = ns.sentencize( str ); * // returns [] * * @example * var str = 'Hello Mrs. Maple, could you call me back?'; * var out = ns.sentencize( str ); * // returns [ 'Hello Mrs. Maple, could you call me back?' ] */ sentencize: typeof sentencize; /** * Tokenize a string. * * @param str - input string * @param keepWhitespace - boolean indicating whether whitespace characters should be returned as part of the token array (default: false) * @returns array of tokens * * @example * var str = 'Hello World!'; * var out = ns.tokenize( str ); * // returns [ 'Hello', 'World', '!' ] * * @example * var str = ''; * var out = ns.tokenize( str ); * // returns [] * * @example * var str = 'Hello Mrs. Maple, could you call me back?'; * var out = ns.tokenize( str ); * // returns [ 'Hello', 'Mrs.', 'Maple', ',', 'could', 'you', 'call', 'me', 'back', '?' ] */ tokenize: typeof tokenize; } /** * Natural language processing. */ declare var ns: Namespace; // EXPORTS // export = ns;