diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c8ff53..a9a08cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +0.0.6 (2018-10-14) +================== + +* [Deprecated] Add warning messages for **keep_metadata** and **save_mode** for `athena.query` operator. + 0.0.5 (2018-09-23) ================== diff --git a/README.md b/README.md index 051872b..3a2bd91 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ _export: repositories: - https://jitpack.io dependencies: - - pro.civitaspo:digdag-operator-athena:0.0.5 + - pro.civitaspo:digdag-operator-athena:0.0.6 athena: auth_method: profile @@ -86,11 +86,13 @@ Define the below options on properties (which is indicated by `-c`, `--config`). - **output**: The location in Amazon S3 where your query results are stored, such as `"s3://path/to/query/"`. For more information, see [Queries and Query Result Files](https://docs.aws.amazon.com/athena/latest/ug/querying.html). (string, required) - **keep_metadata**: Indicate whether to keep the metadata after executing the query. (boolean, default: `false`) - **NOTE**: If **keep_metadata** is false, `athena.preview>` operator cannot be used except in this time, because athena [`GetQueryResults API`](https://docs.aws.amazon.com/athena/latest/APIReference/API_GetQueryResults.html) requires metadata. + - **NOTE**: [Athena supports CTAS](https://aws.amazon.com/jp/about-aws/whats-new/2018/10/athena_ctas_support/), so digdag-operator-athena will support it as `athena.ctas>` operator. After that, 'keep_metadata' option will be removed and the default behaviour will become the same as `keep_metadata: true` (the current default behaviour is the same as `keep_metadata: false`) because this option was added for that the metadata file is obstructive when using the output csv as another table. - **save_mode**: Specify the expected behavior of saving the query results. Available values are `"append"`, `"error_if_exists"`, `"ignore"`, `"overwrite"`. See the below explanation of the behaviour. (string, default: `"overwrite"`) - `"append"`: When saving the query results, even if other CSVs already exist, the query results are expected to be saved as another CSV. - `"error_if_exists"`: When saving the query results, if other CSVs already exists, an exception is expected to be thrown. - `"ignore"`: When saving the query results, if other CSVs already exists, the save operation is expected to not save the query results and to not change the existing data. - `"overwrite"`: When saving the query results, if other CSVs already exist, existing data is expected to be overwritten by the query results. This operation is not atomic. + - **NOTE**: [Athena supports CTAS](https://aws.amazon.com/jp/about-aws/whats-new/2018/10/athena_ctas_support/), so digdag-operator-athena will support it as `athena.ctas>` operator. After that, 'save_mode' option will be removed and the behaviour will become the same as `save_mode: append` (the current default behaviour is the same as `save_mode: overwrite`) because this option was added for that lots of duplicated output csv files which are created by other executions are sometimes obstructive when using the output csv as another table. - **timeout**: Specify timeout period. (`DurationParam`, default: `"10m"`) - **preview**: Call `athena.preview>` operator after run `athena.query>`. (boolean, default: `true`) - **NOTE**: If **keep_metadata** is false, `athena.preview>` operator cannot be used except in this time, because athena [`GetQueryResults API`](https://docs.aws.amazon.com/athena/latest/APIReference/API_GetQueryResults.html) requires metadata. diff --git a/build.gradle b/build.gradle index 55c1931..3c061ca 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ plugins { } group = 'pro.civitaspo' -version = '0.0.5' +version = '0.0.6' def digdagVersion = '0.9.27' def awsSdkVersion = "1.11.372" diff --git a/example/example.dig b/example/example.dig index 8e702f2..d9b36a9 100644 --- a/example/example.dig +++ b/example/example.dig @@ -4,7 +4,7 @@ _export: - file://${repos} # - https://jitpack.io dependencies: - - pro.civitaspo:digdag-operator-athena:0.0.5 + - pro.civitaspo:digdag-operator-athena:0.0.6 athena: auth_method: profile query: diff --git a/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala b/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala index 3c11d3a..f1d1ff5 100644 --- a/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala +++ b/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala @@ -91,8 +91,20 @@ class AthenaQueryOperator(operatorName: String, context: OperatorContext, system val o = params.get("output", classOf[String]) AmazonS3URI(if (o.endsWith("/")) o else s"$o/") } - protected val keepMetadata: Boolean = params.get("keep_metadata", classOf[Boolean], false) - protected val saveMode: SaveMode = SaveMode(params.get("save_mode", classOf[String], "overwrite")) + + @deprecated protected val keepMetadata: Boolean = { + logger.warn( + "Athena supports CTAS, so digdag-operator-athena will support it as `athena.ctas>` operator. After that, 'keep_metadata' option will be removed and the default behaviour will become the same as `keep_metadata: true` (the current default behaviour is the same as `keep_metadata: false`) because this option was added for that the metadata file is obstructive when using the output csv as another table." + ) + params.get("keep_metadata", classOf[Boolean], false) + } + + @deprecated protected val saveMode: SaveMode = { + logger.warn( + "Athena supports CTAS, so digdag-operator-athena will support it as `athena.ctas>` operator. After that, 'save_mode' option will be removed and the behaviour will become the same as `save_mode: append` (the current default behaviour is the same as `save_mode: overwrite`) because this option was added for that lots of duplicated output csv files which are created by other executions are sometimes obstructive when using the output csv as another table." + ) + SaveMode(params.get("save_mode", classOf[String], "overwrite")) + } protected val timeout: DurationParam = params.get("timeout", classOf[DurationParam], DurationParam.parse("10m")) protected val preview: Boolean = params.get("preview", classOf[Boolean], true)