UNPKG

@aws-cdk/aws-glue-alpha

Version:

The CDK Construct Library for AWS::Glue

254 lines (253 loc) 9.45 kB
/** * Absolute class name of the Hadoop `InputFormat` to use when reading table files. */ export declare class InputFormat { readonly className: string; /** * InputFormat for Avro files. * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerInputFormat.html */ static readonly AVRO: InputFormat; /** * InputFormat for Cloudtrail Logs. * * @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html */ static readonly CLOUDTRAIL: InputFormat; /** * InputFormat for Orc files. * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.html */ static readonly ORC: InputFormat; /** * InputFormat for Parquet files. * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.html */ static readonly PARQUET: InputFormat; /** * An InputFormat for plain text files. Files are broken into lines. Either linefeed or * carriage-return are used to signal end of line. Keys are the position in the file, and * values are the line of text. * JSON & CSV files are examples of this InputFormat * * @see https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/TextInputFormat.html */ static readonly TEXT: InputFormat; constructor(className: string); } /** * Absolute class name of the Hadoop `OutputFormat` to use when writing table files. */ export declare class OutputFormat { readonly className: string; /** * Writes text data with a null key (value only). * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.html */ static readonly HIVE_IGNORE_KEY_TEXT: OutputFormat; /** * OutputFormat for Avro files. * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.html */ static readonly AVRO: InputFormat; /** * OutputFormat for Orc files. * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.html */ static readonly ORC: InputFormat; /** * OutputFormat for Parquet files. * * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.html */ static readonly PARQUET: OutputFormat; constructor(className: string); } /** * Serialization library to use when serializing/deserializing (SerDe) table records. * * @see https://cwiki.apache.org/confluence/display/Hive/SerDe */ export declare class SerializationLibrary { readonly className: string; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/avro/AvroSerDe.html */ static readonly AVRO: SerializationLibrary; /** * @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html */ static readonly CLOUDTRAIL: SerializationLibrary; /** * @see https://docs.aws.amazon.com/athena/latest/ug/grok.html */ static readonly GROK: SerializationLibrary; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hive/hcatalog/data/JsonSerDe.html */ static readonly HIVE_JSON: SerializationLibrary; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.html */ static readonly LAZY_SIMPLE: SerializationLibrary; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/OpenCSVSerde.html */ static readonly OPEN_CSV: SerializationLibrary; /** * @see https://github.com/rcongiu/Hive-JSON-Serde */ static readonly OPENX_JSON: SerializationLibrary; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcSerde.html */ static readonly ORC: SerializationLibrary; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.html */ static readonly PARQUET: SerializationLibrary; /** * @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/RegexSerDe.html */ static readonly REGEXP: SerializationLibrary; constructor(className: string); } /** * Classification string given to tables with this data format. * * @see https://docs.aws.amazon.com/glue/latest/dg/add-classifier.html#classifier-built-in */ export declare class ClassificationString { readonly value: string; /** * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-avro */ static readonly AVRO: ClassificationString; /** * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-csv */ static readonly CSV: ClassificationString; /** * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-json */ static readonly JSON: ClassificationString; /** * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-xml */ static readonly XML: ClassificationString; /** * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-parquet */ static readonly PARQUET: ClassificationString; /** * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-orc */ static readonly ORC: ClassificationString; constructor(value: string); } /** * Properties of a DataFormat instance. */ export interface DataFormatProps { /** * `InputFormat` for this data format. */ readonly inputFormat: InputFormat; /** * `OutputFormat` for this data format. */ readonly outputFormat: OutputFormat; /** * Serialization library for this data format. */ readonly serializationLibrary: SerializationLibrary; /** * Classification string given to tables with this data format. * * @default - No classification is specified. */ readonly classificationString?: ClassificationString; } /** * Defines the input/output formats and ser/de for a single DataFormat. */ export declare class DataFormat { /** * DataFormat for Apache Web Server Logs. Also works for CloudFront logs * * @see https://docs.aws.amazon.com/athena/latest/ug/apache.html */ static readonly APACHE_LOGS: DataFormat; /** * DataFormat for Apache Avro * * @see https://docs.aws.amazon.com/athena/latest/ug/avro.html */ static readonly AVRO: DataFormat; /** * DataFormat for CloudTrail logs stored on S3 * * @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html */ static readonly CLOUDTRAIL_LOGS: DataFormat; /** * DataFormat for CSV Files * * @see https://docs.aws.amazon.com/athena/latest/ug/csv.html */ static readonly CSV: DataFormat; /** * Stored as plain text files in JSON format. * Uses OpenX Json SerDe for serialization and deseralization. * * @see https://docs.aws.amazon.com/athena/latest/ug/json.html */ static readonly JSON: DataFormat; /** * DataFormat for Logstash Logs, using the GROK SerDe * * @see https://docs.aws.amazon.com/athena/latest/ug/grok.html */ static readonly LOGSTASH: DataFormat; /** * DataFormat for Apache ORC (Optimized Row Columnar) * * @see https://docs.aws.amazon.com/athena/latest/ug/orc.html */ static readonly ORC: DataFormat; /** * DataFormat for Apache Parquet * * @see https://docs.aws.amazon.com/athena/latest/ug/parquet.html */ static readonly PARQUET: DataFormat; /** * DataFormat for TSV (Tab-Separated Values) * * @see https://docs.aws.amazon.com/athena/latest/ug/lazy-simple-serde.html */ static readonly TSV: DataFormat; /** * `InputFormat` for this data format. */ readonly inputFormat: InputFormat; /** * `OutputFormat` for this data format. */ readonly outputFormat: OutputFormat; /** * Serialization library for this data format. */ readonly serializationLibrary: SerializationLibrary; /** * Classification string given to tables with this data format. */ readonly classificationString?: ClassificationString; constructor(props: DataFormatProps); }