@aws-cdk/aws-glue-alpha
Version:
The CDK Construct Library for AWS::Glue
254 lines (253 loc) • 9.45 kB
TypeScript
/**
* Absolute class name of the Hadoop `InputFormat` to use when reading table files.
*/
export declare class InputFormat {
readonly className: string;
/**
* InputFormat for Avro files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerInputFormat.html
*/
static readonly AVRO: InputFormat;
/**
* InputFormat for Cloudtrail Logs.
*
* @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html
*/
static readonly CLOUDTRAIL: InputFormat;
/**
* InputFormat for Orc files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.html
*/
static readonly ORC: InputFormat;
/**
* InputFormat for Parquet files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.html
*/
static readonly PARQUET: InputFormat;
/**
* An InputFormat for plain text files. Files are broken into lines. Either linefeed or
* carriage-return are used to signal end of line. Keys are the position in the file, and
* values are the line of text.
* JSON & CSV files are examples of this InputFormat
*
* @see https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/TextInputFormat.html
*/
static readonly TEXT: InputFormat;
constructor(className: string);
}
/**
* Absolute class name of the Hadoop `OutputFormat` to use when writing table files.
*/
export declare class OutputFormat {
readonly className: string;
/**
* Writes text data with a null key (value only).
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.html
*/
static readonly HIVE_IGNORE_KEY_TEXT: OutputFormat;
/**
* OutputFormat for Avro files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.html
*/
static readonly AVRO: InputFormat;
/**
* OutputFormat for Orc files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.html
*/
static readonly ORC: InputFormat;
/**
* OutputFormat for Parquet files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.html
*/
static readonly PARQUET: OutputFormat;
constructor(className: string);
}
/**
* Serialization library to use when serializing/deserializing (SerDe) table records.
*
* @see https://cwiki.apache.org/confluence/display/Hive/SerDe
*/
export declare class SerializationLibrary {
readonly className: string;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/avro/AvroSerDe.html
*/
static readonly AVRO: SerializationLibrary;
/**
* @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html
*/
static readonly CLOUDTRAIL: SerializationLibrary;
/**
* @see https://docs.aws.amazon.com/athena/latest/ug/grok.html
*/
static readonly GROK: SerializationLibrary;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hive/hcatalog/data/JsonSerDe.html
*/
static readonly HIVE_JSON: SerializationLibrary;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.html
*/
static readonly LAZY_SIMPLE: SerializationLibrary;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/OpenCSVSerde.html
*/
static readonly OPEN_CSV: SerializationLibrary;
/**
* @see https://github.com/rcongiu/Hive-JSON-Serde
*/
static readonly OPENX_JSON: SerializationLibrary;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcSerde.html
*/
static readonly ORC: SerializationLibrary;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.html
*/
static readonly PARQUET: SerializationLibrary;
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/RegexSerDe.html
*/
static readonly REGEXP: SerializationLibrary;
constructor(className: string);
}
/**
* Classification string given to tables with this data format.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/add-classifier.html#classifier-built-in
*/
export declare class ClassificationString {
readonly value: string;
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-avro
*/
static readonly AVRO: ClassificationString;
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-csv
*/
static readonly CSV: ClassificationString;
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-json
*/
static readonly JSON: ClassificationString;
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-xml
*/
static readonly XML: ClassificationString;
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-parquet
*/
static readonly PARQUET: ClassificationString;
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-orc
*/
static readonly ORC: ClassificationString;
constructor(value: string);
}
/**
* Properties of a DataFormat instance.
*/
export interface DataFormatProps {
/**
* `InputFormat` for this data format.
*/
readonly inputFormat: InputFormat;
/**
* `OutputFormat` for this data format.
*/
readonly outputFormat: OutputFormat;
/**
* Serialization library for this data format.
*/
readonly serializationLibrary: SerializationLibrary;
/**
* Classification string given to tables with this data format.
*
* @default - No classification is specified.
*/
readonly classificationString?: ClassificationString;
}
/**
* Defines the input/output formats and ser/de for a single DataFormat.
*/
export declare class DataFormat {
/**
* DataFormat for Apache Web Server Logs. Also works for CloudFront logs
*
* @see https://docs.aws.amazon.com/athena/latest/ug/apache.html
*/
static readonly APACHE_LOGS: DataFormat;
/**
* DataFormat for Apache Avro
*
* @see https://docs.aws.amazon.com/athena/latest/ug/avro.html
*/
static readonly AVRO: DataFormat;
/**
* DataFormat for CloudTrail logs stored on S3
*
* @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html
*/
static readonly CLOUDTRAIL_LOGS: DataFormat;
/**
* DataFormat for CSV Files
*
* @see https://docs.aws.amazon.com/athena/latest/ug/csv.html
*/
static readonly CSV: DataFormat;
/**
* Stored as plain text files in JSON format.
* Uses OpenX Json SerDe for serialization and deseralization.
*
* @see https://docs.aws.amazon.com/athena/latest/ug/json.html
*/
static readonly JSON: DataFormat;
/**
* DataFormat for Logstash Logs, using the GROK SerDe
*
* @see https://docs.aws.amazon.com/athena/latest/ug/grok.html
*/
static readonly LOGSTASH: DataFormat;
/**
* DataFormat for Apache ORC (Optimized Row Columnar)
*
* @see https://docs.aws.amazon.com/athena/latest/ug/orc.html
*/
static readonly ORC: DataFormat;
/**
* DataFormat for Apache Parquet
*
* @see https://docs.aws.amazon.com/athena/latest/ug/parquet.html
*/
static readonly PARQUET: DataFormat;
/**
* DataFormat for TSV (Tab-Separated Values)
*
* @see https://docs.aws.amazon.com/athena/latest/ug/lazy-simple-serde.html
*/
static readonly TSV: DataFormat;
/**
* `InputFormat` for this data format.
*/
readonly inputFormat: InputFormat;
/**
* `OutputFormat` for this data format.
*/
readonly outputFormat: OutputFormat;
/**
* Serialization library for this data format.
*/
readonly serializationLibrary: SerializationLibrary;
/**
* Classification string given to tables with this data format.
*/
readonly classificationString?: ClassificationString;
constructor(props: DataFormatProps);
}