@tensorflow/tfjs-data
Version:
TensorFlow Data API in JavaScript
99 lines (98 loc) • 4.51 kB
TypeScript
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
/// <amd-module name="@tensorflow/tfjs-data/dist/datasets/csv_dataset" />
import { TensorContainer } from '@tensorflow/tfjs-core';
import { Dataset } from '../dataset';
import { DataSource } from '../datasource';
import { LazyIterator } from '../iterators/lazy_iterator';
import { CSVConfig } from '../types';
import { TextLineDataset } from './text_line_dataset';
/**
* Represents a potentially large collection of delimited text records.
*
* The produced `TensorContainer`s each contain one key-value pair for
* every column of the table. When a field is empty in the incoming data, the
* resulting value is `undefined`, or throw error if it is required. Values
* that can be parsed as numbers are emitted as type `number`, other values
* are parsed as `string`.
*
* The results are not batched.
*
* @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'}
*/
export declare class CSVDataset extends Dataset<TensorContainer> {
protected readonly input: DataSource;
base: TextLineDataset;
private hasHeader;
private fullColumnNames;
private columnNamesValidated;
private columnConfigs;
private configuredColumnsOnly;
private delimiter;
private delimWhitespace;
/**
* Returns column names of the csv dataset. If `configuredColumnsOnly` is
* true, return column names in `columnConfigs`. If `configuredColumnsOnly` is
* false and `columnNames` is provided, `columnNames`. If
* `configuredColumnsOnly` is false and `columnNames` is not provided, return
* all column names parsed from the csv file. For example usage please go to
* `tf.data.csv`.
*
* @doc {heading: 'Data', subheading: 'Classes'}
*/
columnNames(): Promise<string[]>;
private setColumnNames;
private maybeReadHeaderLine;
/**
* Create a `CSVDataset`.
*
* @param input A `DataSource` providing a chunked, UTF8-encoded byte stream.
* @param csvConfig (Optional) A CSVConfig object that contains configurations
* of reading and decoding from CSV file(s).
*
* hasHeader: (Optional) A boolean value that indicates whether the first
* row of provided CSV file is a header line with column names, and should
* not be included in the data. Defaults to `true`.
*
* columnNames: (Optional) A list of strings that corresponds to
* the CSV column names, in order. If provided, it ignores the column
* names inferred from the header row. If not provided, infers the column
* names from the first row of the records. If hasHeader is false and
* columnNames is not provided, this method throws an error.
*
* columnConfigs: (Optional) A dictionary whose key is column names, value
* is an object stating if this column is required, column's data type,
* default value, and if this column is label. If provided, keys must
* correspond to names provided in columnNames or inferred from the file
* header lines. If isLabel is true any column, returns an array of two
* items: the first item is a dict of features key/value pairs, the second
* item is a dict of labels key/value pairs. If no feature is marked as
* label, returns a dict of features only.
*
* configuredColumnsOnly (Optional) If true, only columns provided in
* columnConfigs will be parsed and provided during iteration.
*
* delimiter (Optional) The string used to parse each line of the input
* file. Defaults to `,`.
*/
constructor(input: DataSource, csvConfig?: CSVConfig);
iterator(): Promise<LazyIterator<TensorContainer>>;
makeDataElement(line: string): TensorContainer;
private getBoolean;
private parseRow;
}