UNPKG

@tensorflow/tfjs-core

Version:

Hardware-accelerated JavaScript library for machine intelligence

65 lines (64 loc) 3.33 kB
/** * @license * Copyright 2021 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= */ /// <amd-module name="@tensorflow/tfjs-core/dist/ops/string/string_n_grams" /> import { Tensor, Tensor1D } from '../../tensor'; import { NamedTensorMap } from '../../tensor_types'; import { TensorLike } from '../../types'; /** * Creates ngrams from ragged string data. * * This op accepts a ragged tensor with 1 ragged dimension containing only * strings and outputs a ragged tensor with 1 ragged dimension containing ngrams * of that string, joined along the innermost axis. * * ```js * const result = tf.string.stringNGrams( * ['a', 'b', 'c', 'd'], tf.tensor1d([0, 2, 4], 'int32'), * '|', [1, 2], 'LP', 'RP', -1, false); * result['nGrams'].print(); // ['a', 'b', 'LP|a', 'a|b', 'b|RP', * // 'c', 'd', 'LP|c', 'c|d', 'd|RP'] * result['nGramsSplits'].print(); // [0, 5, 10] * ``` * @param data: The values tensor of the ragged string tensor to make ngrams out * of. Must be a 1D string tensor. * @param dataSplits: The splits tensor of the ragged string tensor to make * ngrams out of. * @param separator: The string to append between elements of the token. Use "" * for no separator. * @param nGramWidths: The sizes of the ngrams to create. * @param leftPad: The string to use to pad the left side of the ngram sequence. * Only used if pad_width !== 0. * @param rightPad: The string to use to pad the right side of the ngram * sequence. Only used if pad_width !== 0. * @param padWidth: The number of padding elements to add to each side of each * sequence. Note that padding will never be greater than `nGramWidths`-1 * regardless of this value. If `padWidth`=-1, then add max(`nGramWidths`)-1 * elements. * @param preserveShortSequences: If true, then ensure that at least one ngram * is generated for each input sequence. In particular, if an input sequence * is shorter than min(ngramWidth) + 2*padWidth, then generate a single * ngram containing the entire sequence. If false, then no ngrams are * generated for these short input sequences. * @return A map with the following properties: * - nGrams: The values tensor of the output ngrams ragged tensor. * - nGramsSplits: The splits tensor of the output ngrams ragged tensor. * * @doc {heading: 'Operations', subheading: 'String'} */ declare function stringNGrams_(data: Tensor1D | TensorLike, dataSplits: Tensor | TensorLike, separator: string, nGramWidths: number[], leftPad: string, rightPad: string, padWidth: number, preserveShortSequences: boolean): NamedTensorMap; export declare const stringNGrams: typeof stringNGrams_; export {};