@aws-cdk/aws-glue-alpha
Version:
The CDK Construct Library for AWS::Glue
295 lines • 34.2 kB
JavaScript
"use strict";
var _a, _b, _c, _d, _e;
Object.defineProperty(exports, "__esModule", { value: true });
exports.DataFormat = exports.ClassificationString = exports.SerializationLibrary = exports.OutputFormat = exports.InputFormat = void 0;
const jsiiDeprecationWarnings = require("../.warnings.jsii.js");
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
/**
* Absolute class name of the Hadoop `InputFormat` to use when reading table files.
*/
class InputFormat {
constructor(className) {
this.className = className;
}
}
exports.InputFormat = InputFormat;
_a = JSII_RTTI_SYMBOL_1;
InputFormat[_a] = { fqn: "@aws-cdk/aws-glue-alpha.InputFormat", version: "2.218.0-alpha.0" };
/**
* InputFormat for Avro files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerInputFormat.html
*/
InputFormat.AVRO = new InputFormat('org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat');
/**
* InputFormat for Cloudtrail Logs.
*
* @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html
*/
InputFormat.CLOUDTRAIL = new InputFormat('com.amazon.emr.cloudtrail.CloudTrailInputFormat');
/**
* InputFormat for Orc files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.html
*/
InputFormat.ORC = new InputFormat('org.apache.hadoop.hive.ql.io.orc.OrcInputFormat');
/**
* InputFormat for Parquet files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.html
*/
InputFormat.PARQUET = new InputFormat('org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat');
/**
* An InputFormat for plain text files. Files are broken into lines. Either linefeed or
* carriage-return are used to signal end of line. Keys are the position in the file, and
* values are the line of text.
* JSON & CSV files are examples of this InputFormat
*
* @see https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/TextInputFormat.html
*/
InputFormat.TEXT = new InputFormat('org.apache.hadoop.mapred.TextInputFormat');
/**
* Absolute class name of the Hadoop `OutputFormat` to use when writing table files.
*/
class OutputFormat {
constructor(className) {
this.className = className;
}
}
exports.OutputFormat = OutputFormat;
_b = JSII_RTTI_SYMBOL_1;
OutputFormat[_b] = { fqn: "@aws-cdk/aws-glue-alpha.OutputFormat", version: "2.218.0-alpha.0" };
/**
* Writes text data with a null key (value only).
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.html
*/
OutputFormat.HIVE_IGNORE_KEY_TEXT = new OutputFormat('org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat');
/**
* OutputFormat for Avro files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.html
*/
OutputFormat.AVRO = new InputFormat('org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat');
/**
* OutputFormat for Orc files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.html
*/
OutputFormat.ORC = new InputFormat('org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat');
/**
* OutputFormat for Parquet files.
*
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.html
*/
OutputFormat.PARQUET = new OutputFormat('org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat');
/**
* Serialization library to use when serializing/deserializing (SerDe) table records.
*
* @see https://cwiki.apache.org/confluence/display/Hive/SerDe
*/
class SerializationLibrary {
constructor(className) {
this.className = className;
}
}
exports.SerializationLibrary = SerializationLibrary;
_c = JSII_RTTI_SYMBOL_1;
SerializationLibrary[_c] = { fqn: "@aws-cdk/aws-glue-alpha.SerializationLibrary", version: "2.218.0-alpha.0" };
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/avro/AvroSerDe.html
*/
SerializationLibrary.AVRO = new SerializationLibrary('org.apache.hadoop.hive.serde2.avro.AvroSerDe');
/**
* @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html
*/
SerializationLibrary.CLOUDTRAIL = new SerializationLibrary('com.amazon.emr.hive.serde.CloudTrailSerde');
/**
* @see https://docs.aws.amazon.com/athena/latest/ug/grok.html
*/
SerializationLibrary.GROK = new SerializationLibrary('com.amazonaws.glue.serde.GrokSerDe');
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hive/hcatalog/data/JsonSerDe.html
*/
SerializationLibrary.HIVE_JSON = new SerializationLibrary('org.apache.hive.hcatalog.data.JsonSerDe');
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.html
*/
SerializationLibrary.LAZY_SIMPLE = new SerializationLibrary('org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe');
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/OpenCSVSerde.html
*/
SerializationLibrary.OPEN_CSV = new SerializationLibrary('org.apache.hadoop.hive.serde2.OpenCSVSerde');
/**
* @see https://github.com/rcongiu/Hive-JSON-Serde
*/
SerializationLibrary.OPENX_JSON = new SerializationLibrary('org.openx.data.jsonserde.JsonSerDe');
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/orc/OrcSerde.html
*/
SerializationLibrary.ORC = new SerializationLibrary('org.apache.hadoop.hive.ql.io.orc.OrcSerde');
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.html
*/
SerializationLibrary.PARQUET = new SerializationLibrary('org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe');
/**
* @see https://svn.apache.org/repos/infra/websites/production/hive/content/javadocs/r3.1.3/api/org/apache/hadoop/hive/serde2/RegexSerDe.html
*/
SerializationLibrary.REGEXP = new SerializationLibrary('org.apache.hadoop.hive.serde2.RegexSerDe');
/**
* Classification string given to tables with this data format.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/add-classifier.html#classifier-built-in
*/
class ClassificationString {
constructor(value) {
this.value = value;
}
}
exports.ClassificationString = ClassificationString;
_d = JSII_RTTI_SYMBOL_1;
ClassificationString[_d] = { fqn: "@aws-cdk/aws-glue-alpha.ClassificationString", version: "2.218.0-alpha.0" };
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-avro
*/
ClassificationString.AVRO = new ClassificationString('avro');
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-csv
*/
ClassificationString.CSV = new ClassificationString('csv');
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-json
*/
ClassificationString.JSON = new ClassificationString('json');
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-xml
*/
ClassificationString.XML = new ClassificationString('xml');
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-parquet
*/
ClassificationString.PARQUET = new ClassificationString('parquet');
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-orc
*/
ClassificationString.ORC = new ClassificationString('orc');
/**
* Defines the input/output formats and ser/de for a single DataFormat.
*/
class DataFormat {
constructor(props) {
try {
jsiiDeprecationWarnings._aws_cdk_aws_glue_alpha_DataFormatProps(props);
}
catch (error) {
if (process.env.JSII_DEBUG !== "1" && error.name === "DeprecationError") {
Error.captureStackTrace(error, DataFormat);
}
throw error;
}
this.inputFormat = props.inputFormat;
this.outputFormat = props.outputFormat;
this.serializationLibrary = props.serializationLibrary;
this.classificationString = props.classificationString;
}
}
exports.DataFormat = DataFormat;
_e = JSII_RTTI_SYMBOL_1;
DataFormat[_e] = { fqn: "@aws-cdk/aws-glue-alpha.DataFormat", version: "2.218.0-alpha.0" };
/**
* DataFormat for Apache Web Server Logs. Also works for CloudFront logs
*
* @see https://docs.aws.amazon.com/athena/latest/ug/apache.html
*/
DataFormat.APACHE_LOGS = new DataFormat({
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.REGEXP,
});
/**
* DataFormat for Apache Avro
*
* @see https://docs.aws.amazon.com/athena/latest/ug/avro.html
*/
DataFormat.AVRO = new DataFormat({
inputFormat: InputFormat.AVRO,
outputFormat: OutputFormat.AVRO,
serializationLibrary: SerializationLibrary.AVRO,
classificationString: ClassificationString.AVRO,
});
/**
* DataFormat for CloudTrail logs stored on S3
*
* @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html
*/
DataFormat.CLOUDTRAIL_LOGS = new DataFormat({
inputFormat: InputFormat.CLOUDTRAIL,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.CLOUDTRAIL,
});
/**
* DataFormat for CSV Files
*
* @see https://docs.aws.amazon.com/athena/latest/ug/csv.html
*/
DataFormat.CSV = new DataFormat({
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.OPEN_CSV,
classificationString: ClassificationString.CSV,
});
/**
* Stored as plain text files in JSON format.
* Uses OpenX Json SerDe for serialization and deseralization.
*
* @see https://docs.aws.amazon.com/athena/latest/ug/json.html
*/
DataFormat.JSON = new DataFormat({
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.OPENX_JSON,
classificationString: ClassificationString.JSON,
});
/**
* DataFormat for Logstash Logs, using the GROK SerDe
*
* @see https://docs.aws.amazon.com/athena/latest/ug/grok.html
*/
DataFormat.LOGSTASH = new DataFormat({
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.GROK,
});
/**
* DataFormat for Apache ORC (Optimized Row Columnar)
*
* @see https://docs.aws.amazon.com/athena/latest/ug/orc.html
*/
DataFormat.ORC = new DataFormat({
inputFormat: InputFormat.ORC,
outputFormat: OutputFormat.ORC,
serializationLibrary: SerializationLibrary.ORC,
classificationString: ClassificationString.ORC,
});
/**
* DataFormat for Apache Parquet
*
* @see https://docs.aws.amazon.com/athena/latest/ug/parquet.html
*/
DataFormat.PARQUET = new DataFormat({
inputFormat: InputFormat.PARQUET,
outputFormat: OutputFormat.PARQUET,
serializationLibrary: SerializationLibrary.PARQUET,
classificationString: ClassificationString.PARQUET,
});
/**
* DataFormat for TSV (Tab-Separated Values)
*
* @see https://docs.aws.amazon.com/athena/latest/ug/lazy-simple-serde.html
*/
DataFormat.TSV = new DataFormat({
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.LAZY_SIMPLE,
});
//# sourceMappingURL=data:application/json;base64,