From 3e442345f1d18a51b490ec5e1120906c5d559cc7 Mon Sep 17 00:00:00 2001 From: dazou-exp <164045802+dazou-exp@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:47:28 -0500 Subject: [PATCH] feature: Added common_producer_iamroles to s3 bucket policy (#251) * added common_producer_iamroles to grant read-write access to all apiary managed schemas * fix whitespace issues * update description for common_producer_iamroles * adjusted permissions granted to common_producer_iamroles * adjusted permissions granted to common_producer_iamroles * bump version to 7.1.0 * added permissions to read-write object tagging * fix typo missing comma * fix typo extra comma --------- Co-authored-by: David Zou --- CHANGELOG.md | 4 + VARIABLES.md | 255 +++++++++++++++------------- s3.tf | 1 + templates/apiary-bucket-policy.json | 31 ++++ variables.tf | 6 + 5 files changed, 177 insertions(+), 120 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1da8d0..ada126d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.0] - 2024-03-21 +### Added +- Added `common_producer_iamroles` to allow roles read-write access to all Apiary managed schemas. + ## [7.0.1] - 2024-01-22 ### Added - Added `datadog-agent` for HMS-Readonly and HMS-Readwrite in ECS. diff --git a/VARIABLES.md b/VARIABLES.md index dcdee63..3bd023c 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -2,122 +2,123 @@ ## Inputs -| Name | Description | Type | Default | Required | -|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:| -| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | -| apiary\_consumer\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to all data in managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| Name | Description | Type | Default | Required | +|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:| +| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | +| apiary\_consumer\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to all data in managed Apiary S3 buckets. | `list(string)` | `[]` | no | | apiary\_consumer\_prefix\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to certain prefixes in managed Apiary S3 buckets. See below section for more information and format. | `map(map(list(string)))` | `{}` | no | -| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | -| apiary\_customer\_condition | IAM policy condition applied to customer account S3 object access. | `string` | `""` | no | -| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | -| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | -| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | -| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | -| apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | -| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | -| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | -| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | -| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | -| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | -| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | -| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | -| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | -| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | -| aws\_region | AWS region. | `string` | n/a | yes | -| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | -| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | -| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | -| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | -| db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | -| db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | -| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | -| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | -| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | -| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | -| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | -| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | -| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | -| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | -| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | -| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | -| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | -| enable\_autoscaling | Enable read only Hive Metastore k8s horizontal pod autoscaling. | `bool` | `true` | no | -| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | -| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | -| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | -| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | -| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | -| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | -| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | -| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | -| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | -| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | -| hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | -| hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | -| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | -| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | -| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | -| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | -| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | -| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | -| hms\_ro\_db\_connection\_pool\_size | Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | -| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | -| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | -| hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | -| hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | -| hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | -| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | -| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | -| hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | -| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | -| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | -| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | -| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | -| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | -| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | -| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | -| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | -| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | -| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | -| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | -| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | -| private\_subnets | Private subnets. | `list(any)` | n/a | yes | -| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | -| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | -| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | -| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | -| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | -| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | -| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | -| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | -| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | -| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | -| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | -| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | -| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | -| s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | -| s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | -| s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | -| s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | -| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | -| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | -| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | -| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | -| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | -| vpc\_id | VPC ID. | `string` | n/a | yes | -| enable\_dashboard | make EKS & ECS dashboard optional | `bool` | true | no | -| rds\_family | RDS Family | `string` | aurora5.6 | no | -| datadog_metrics_enabled | Enable Datadog metrics for HMS | `bool` | false | no | -| datadog_metrics_hms_readwrite_readonly | Prometheus Metrics sent to datadog | list(string) | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"] | no | -| datadog_metrics_port | Port in which metrics will be send for Datadog | string | 8080 | no | -| datadog_key_secret_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments. | string | null | no | -| datadog_agent_version | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments. | string | 7.50.3-jmx | no | -| datadog_agent_enabled | Whether to include the datadog-agent container. This is only applicable to ECS deployments. | string | false | no | +| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | +| apiary\_customer\_condition | IAM policy condition applied to customer account S3 object access. | `string` | `""` | no | +| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | +| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | +| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | +| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | +| apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | +| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | +| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | +| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | +| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | +| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | +| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | +| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | +| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | +| aws\_region | AWS region. | `string` | n/a | yes | +| common\_producer\_iamroles | AWS IAM roles allowed general (not tied to schema) write access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | +| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | +| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | +| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | +| db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | +| db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | +| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | +| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | +| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | +| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | +| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | +| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | +| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | +| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | +| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | +| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | +| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | +| enable\_autoscaling | Enable read only Hive Metastore k8s horizontal pod autoscaling. | `bool` | `true` | no | +| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | +| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | +| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | +| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | +| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | +| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | +| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | +| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | +| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | +| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | +| hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | +| hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | +| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | +| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | +| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | +| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | +| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | +| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_ro\_db\_connection\_pool\_size | Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | +| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | +| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | +| hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | +| hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | +| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | +| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | +| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | +| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | +| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | +| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | +| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | +| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | +| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | +| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | +| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | +| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | +| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | +| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | +| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | +| private\_subnets | Private subnets. | `list(any)` | n/a | yes | +| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | +| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | +| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | +| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | +| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | +| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | +| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | +| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | +| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | +| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | +| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | +| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | +| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | +| s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | +| s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | +| s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | +| s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | +| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | +| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | +| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | +| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | +| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | +| vpc\_id | VPC ID. | `string` | n/a | yes | +| enable\_dashboard | make EKS & ECS dashboard optional | `bool` | true | no | +| rds\_family | RDS Family | `string` | aurora5.6 | no | +| datadog_metrics_enabled | Enable Datadog metrics for HMS | `bool` | false | no | +| datadog_metrics_hms_readwrite_readonly | Prometheus Metrics sent to datadog | list(string) | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"] | no | +| datadog_metrics_port | Port in which metrics will be send for Datadog | string | 8080 | no | +| datadog_key_secret_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments. | string | null | no | +| datadog_agent_version | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments. | string | 7.50.3-jmx | no | +| datadog_agent_enabled | Whether to include the datadog-agent container. This is only applicable to ECS deployments. | string | false | no | ### apiary_assume_roles @@ -230,15 +231,15 @@ apiary_consumer_prefix_iamroles = { ### apiary_customer_condition A string that defines a list of conditions that restrict which objects in an Apiary schema's S3 bucket may be read cross-account by accounts in the `customer_accounts` list. -The string is a semicolon-delimited list of comma-delimited strings that specify conditions that are valid in AWS S3 bucket policy +The string is a semicolon-delimited list of comma-delimited strings that specify conditions that are valid in AWS S3 bucket policy [Condition](https://docs.aws.amazon.com/AmazonS3/latest/userguide/amazon-s3-policy-keys.html) sections. This condition is applied to every Apiary schema's S3 bucket policy. An example entry to limit access to: - Only requests from certain VPC CIDR blocks - And only to objects that have: - - Either an S3 tag of `data-sensitivity=false` or - - An S3 tag of `data-type=image*` -looks like: + - Either an S3 tag of `data-sensitivity=false` or + - An S3 tag of `data-type=image*` + looks like: ``` apiary_customer_condition = <:role/", + "arn:aws:iam:::role/", + ... +] +``` \ No newline at end of file diff --git a/s3.tf b/s3.tf index 5c8ca14..c4bf078 100644 --- a/s3.tf +++ b/s3.tf @@ -25,6 +25,7 @@ locals { client_roles = replace(lookup(schema, "client_roles", ""), ",", "\",\"") governance_iamroles = join("\",\"", var.apiary_governance_iamroles) consumer_prefix_roles = lookup(var.apiary_consumer_prefix_iamroles, schema["schema_name"], {}) + common_producer_iamroles = join("\",\"", var.common_producer_iamroles) }) } } diff --git a/templates/apiary-bucket-policy.json b/templates/apiary-bucket-policy.json index cf75053..da4422d 100644 --- a/templates/apiary-bucket-policy.json +++ b/templates/apiary-bucket-policy.json @@ -150,6 +150,37 @@ ] }, %{endif} +%{if common_producer_iamroles != ""} + { + "Sid": "General read-write iamrole permissions", + "Effect": "Allow", + "Principal": "*", + "Action": [ + "s3:GetBucketLocation", + "s3:GetObject", + "s3:GetObjectAcl", + "s3:GetBucketAcl", + "s3:GetObjectTagging", + "s3:ListBucket", + "s3:PutObject", + "s3:PutObjectAcl", + "s3:PutObjectTagging", + "s3:DeleteObject", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:ObjectOwnerOverrideToBucketOwner" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}", + "arn:aws:s3:::${bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:PrincipalArn": [ "${common_producer_iamroles}" ] + } + } + }, +%{endif} %{if governance_iamroles != ""} { "Sid": "Apiary governance iamrole permissions", diff --git a/variables.tf b/variables.tf index d0b90fe..c426ab7 100644 --- a/variables.tf +++ b/variables.tf @@ -733,3 +733,9 @@ variable "datadog_agent_enabled" { type = bool default = false } + +variable "common_producer_iamroles" { + description = "AWS IAM roles allowed read-write access to managed Apiary S3 buckets." + type = list(string) + default = [] +} \ No newline at end of file