UNPKG

@tensorflow/tfjs-data

Version:

TensorFlow Data API in JavaScript

99 lines (98 loc) 4.51 kB
/** * @license * Copyright 2018 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ============================================================================= */ /// <amd-module name="@tensorflow/tfjs-data/dist/datasets/csv_dataset" /> import { TensorContainer } from '@tensorflow/tfjs-core'; import { Dataset } from '../dataset'; import { DataSource } from '../datasource'; import { LazyIterator } from '../iterators/lazy_iterator'; import { CSVConfig } from '../types'; import { TextLineDataset } from './text_line_dataset'; /** * Represents a potentially large collection of delimited text records. * * The produced `TensorContainer`s each contain one key-value pair for * every column of the table. When a field is empty in the incoming data, the * resulting value is `undefined`, or throw error if it is required. Values * that can be parsed as numbers are emitted as type `number`, other values * are parsed as `string`. * * The results are not batched. * * @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'} */ export declare class CSVDataset extends Dataset<TensorContainer> { protected readonly input: DataSource; base: TextLineDataset; private hasHeader; private fullColumnNames; private columnNamesValidated; private columnConfigs; private configuredColumnsOnly; private delimiter; private delimWhitespace; /** * Returns column names of the csv dataset. If `configuredColumnsOnly` is * true, return column names in `columnConfigs`. If `configuredColumnsOnly` is * false and `columnNames` is provided, `columnNames`. If * `configuredColumnsOnly` is false and `columnNames` is not provided, return * all column names parsed from the csv file. For example usage please go to * `tf.data.csv`. * * @doc {heading: 'Data', subheading: 'Classes'} */ columnNames(): Promise<string[]>; private setColumnNames; private maybeReadHeaderLine; /** * Create a `CSVDataset`. * * @param input A `DataSource` providing a chunked, UTF8-encoded byte stream. * @param csvConfig (Optional) A CSVConfig object that contains configurations * of reading and decoding from CSV file(s). * * hasHeader: (Optional) A boolean value that indicates whether the first * row of provided CSV file is a header line with column names, and should * not be included in the data. Defaults to `true`. * * columnNames: (Optional) A list of strings that corresponds to * the CSV column names, in order. If provided, it ignores the column * names inferred from the header row. If not provided, infers the column * names from the first row of the records. If hasHeader is false and * columnNames is not provided, this method throws an error. * * columnConfigs: (Optional) A dictionary whose key is column names, value * is an object stating if this column is required, column's data type, * default value, and if this column is label. If provided, keys must * correspond to names provided in columnNames or inferred from the file * header lines. If isLabel is true any column, returns an array of two * items: the first item is a dict of features key/value pairs, the second * item is a dict of labels key/value pairs. If no feature is marked as * label, returns a dict of features only. * * configuredColumnsOnly (Optional) If true, only columns provided in * columnConfigs will be parsed and provided during iteration. * * delimiter (Optional) The string used to parse each line of the input * file. Defaults to `,`. */ constructor(input: DataSource, csvConfig?: CSVConfig); iterator(): Promise<LazyIterator<TensorContainer>>; makeDataElement(line: string): TensorContainer; private getBoolean; private parseRow; }