@tensorflow/tfjs-core
Version:
Hardware-accelerated JavaScript library for machine intelligence
65 lines (64 loc) • 3.33 kB
TypeScript
/**
* @license
* Copyright 2021 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
/// <amd-module name="@tensorflow/tfjs-core/dist/ops/string/string_n_grams" />
import { Tensor, Tensor1D } from '../../tensor';
import { NamedTensorMap } from '../../tensor_types';
import { TensorLike } from '../../types';
/**
* Creates ngrams from ragged string data.
*
* This op accepts a ragged tensor with 1 ragged dimension containing only
* strings and outputs a ragged tensor with 1 ragged dimension containing ngrams
* of that string, joined along the innermost axis.
*
* ```js
* const result = tf.string.stringNGrams(
* ['a', 'b', 'c', 'd'], tf.tensor1d([0, 2, 4], 'int32'),
* '|', [1, 2], 'LP', 'RP', -1, false);
* result['nGrams'].print(); // ['a', 'b', 'LP|a', 'a|b', 'b|RP',
* // 'c', 'd', 'LP|c', 'c|d', 'd|RP']
* result['nGramsSplits'].print(); // [0, 5, 10]
* ```
* @param data: The values tensor of the ragged string tensor to make ngrams out
* of. Must be a 1D string tensor.
* @param dataSplits: The splits tensor of the ragged string tensor to make
* ngrams out of.
* @param separator: The string to append between elements of the token. Use ""
* for no separator.
* @param nGramWidths: The sizes of the ngrams to create.
* @param leftPad: The string to use to pad the left side of the ngram sequence.
* Only used if pad_width !== 0.
* @param rightPad: The string to use to pad the right side of the ngram
* sequence. Only used if pad_width !== 0.
* @param padWidth: The number of padding elements to add to each side of each
* sequence. Note that padding will never be greater than `nGramWidths`-1
* regardless of this value. If `padWidth`=-1, then add max(`nGramWidths`)-1
* elements.
* @param preserveShortSequences: If true, then ensure that at least one ngram
* is generated for each input sequence. In particular, if an input sequence
* is shorter than min(ngramWidth) + 2*padWidth, then generate a single
* ngram containing the entire sequence. If false, then no ngrams are
* generated for these short input sequences.
* @return A map with the following properties:
* - nGrams: The values tensor of the output ngrams ragged tensor.
* - nGramsSplits: The splits tensor of the output ngrams ragged tensor.
*
* @doc {heading: 'Operations', subheading: 'String'}
*/
declare function stringNGrams_(data: Tensor1D | TensorLike, dataSplits: Tensor | TensorLike, separator: string, nGramWidths: number[], leftPad: string, rightPad: string, padWidth: number, preserveShortSequences: boolean): NamedTensorMap;
export declare const stringNGrams: typeof stringNGrams_;
export {};