From a8a900d538f6e4815c3b7c130f871b9029c86ad2 Mon Sep 17 00:00:00 2001 From: Kaiwalya Joshi Date: Wed, 9 Jan 2019 11:14:27 -0500 Subject: [PATCH] DCOS-46811: Configureable RLIMITs for RLIMIT_NOFILE (#2873) * Make per-pod POSIX RLIMIT_NOFILE limits configurable for Elasticsearch. * Make per-pod POSIX RLIMIT_NOFILE limits configurable for Apache Cassandra. * Make per-pod POSIX RLIMIT_NOFILE limits configurable for Apache HDFS. * Make per-pod POSIX RLIMIT_NOFILE limits configurable for Hello-World. * Conform env-vars to Linux naming convention i.e RLIMIT_NOFILE as opposed to the previous RLIMITS_NOFIL.E --- frameworks/cassandra/src/main/dist/svc.yml | 4 + frameworks/cassandra/universe/config.json | 26 ++++- .../cassandra/universe/marathon.json.mustache | 5 +- frameworks/elastic/src/main/dist/svc.yml | 16 +-- frameworks/elastic/universe/config.json | 98 ++++++++++++++++++- .../elastic/universe/marathon.json.mustache | 14 ++- frameworks/hdfs/src/main/dist/svc.yml | 12 +++ frameworks/hdfs/universe/config.json | 72 ++++++++++++++ .../hdfs/universe/marathon.json.mustache | 9 ++ frameworks/helloworld/src/main/dist/svc.yml | 8 ++ frameworks/helloworld/universe/config.json | 48 +++++++++ .../universe/marathon.json.mustache | 8 +- 12 files changed, 307 insertions(+), 13 deletions(-) diff --git a/frameworks/cassandra/src/main/dist/svc.yml b/frameworks/cassandra/src/main/dist/svc.yml index 3daa4843240..219050002fa 100644 --- a/frameworks/cassandra/src/main/dist/svc.yml +++ b/frameworks/cassandra/src/main/dist/svc.yml @@ -21,6 +21,10 @@ pods: - {{CASSANDRA_OPENSSL_URI}} - {{CASSANDRA_URI}} - {{BOOTSTRAP_URI}} + rlimits: + RLIMIT_NOFILE: + soft: {{RLIMIT_NOFILE_SOFT}} + hard: {{RLIMIT_NOFILE_HARD}} resource-sets: server-resources: cpus: {{CASSANDRA_CPUS}} diff --git a/frameworks/cassandra/universe/config.json b/frameworks/cassandra/universe/config.json index 09900b3eac4..3fb781c2c16 100644 --- a/frameworks/cassandra/universe/config.json +++ b/frameworks/cassandra/universe/config.json @@ -132,7 +132,31 @@ "minimum": 60 } } - } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } + } }, "required": [ "name", diff --git a/frameworks/cassandra/universe/marathon.json.mustache b/frameworks/cassandra/universe/marathon.json.mustache index 6e9c0928b7d..2c659364e6c 100644 --- a/frameworks/cassandra/universe/marathon.json.mustache +++ b/frameworks/cassandra/universe/marathon.json.mustache @@ -173,7 +173,10 @@ "READINESS_CHECK_INTERVAL": "{{service.readiness_check.interval}}", "READINESS_CHECK_DELAY": "{{service.readiness_check.delay}}", - "READINESS_CHECK_TIMEOUT": "{{service.readiness_check.timeout}}" + "READINESS_CHECK_TIMEOUT": "{{service.readiness_check.timeout}}", + + "RLIMIT_NOFILE_SOFT": "{{service.rlimits.rlimit_nofile.soft}}", + "RLIMIT_NOFILE_HARD": "{{service.rlimits.rlimit_nofile.hard}}" }, "fetch": [ { "uri": "{{resource.assets.uris.bootstrap-zip}}", "cache": true }, diff --git a/frameworks/elastic/src/main/dist/svc.yml b/frameworks/elastic/src/main/dist/svc.yml index 9fedfb2c302..b7cafa8b9b7 100644 --- a/frameworks/elastic/src/main/dist/svc.yml +++ b/frameworks/elastic/src/main/dist/svc.yml @@ -18,8 +18,8 @@ pods: - {{STATSD_URI}} rlimits: RLIMIT_NOFILE: - soft: 128000 - hard: 128000 + soft: {{MASTER_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{MASTER_NODE_RLIMIT_NOFILE_HARD}} placement: '{{{MASTER_NODE_PLACEMENT}}}' tasks: node: @@ -108,8 +108,8 @@ pods: - {{STATSD_URI}} rlimits: RLIMIT_NOFILE: - soft: 128000 - hard: 128000 + soft: {{DATA_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{DATA_NODE_RLIMIT_NOFILE_HARD}} placement: '{{{DATA_NODE_PLACEMENT}}}' tasks: node: @@ -196,8 +196,8 @@ pods: - {{STATSD_URI}} rlimits: RLIMIT_NOFILE: - soft: 128000 - hard: 128000 + soft: {{INGEST_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{INGEST_NODE_RLIMIT_NOFILE_HARD}} placement: '{{{INGEST_NODE_PLACEMENT}}}' tasks: node: @@ -284,8 +284,8 @@ pods: - {{STATSD_URI}} rlimits: RLIMIT_NOFILE: - soft: 128000 - hard: 128000 + soft: {{COORDINATOR_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{COORDINATOR_NODE_RLIMIT_NOFILE_HARD}} placement: '{{{COORDINATOR_NODE_PLACEMENT}}}' tasks: node: diff --git a/frameworks/elastic/universe/config.json b/frameworks/elastic/universe/config.json index 4d0451330c7..2fb483ec79f 100644 --- a/frameworks/elastic/universe/config.json +++ b/frameworks/elastic/universe/config.json @@ -182,6 +182,30 @@ "minimum": 10 } } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ @@ -267,6 +291,30 @@ "minimum": 10 } } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ @@ -352,6 +400,30 @@ "minimum": 10 } } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ @@ -437,7 +509,31 @@ "minimum": 10 } } - } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } + } }, "required": [ "cpus", diff --git a/frameworks/elastic/universe/marathon.json.mustache b/frameworks/elastic/universe/marathon.json.mustache index 10abb9f3d13..b93b45a8e1f 100644 --- a/frameworks/elastic/universe/marathon.json.mustache +++ b/frameworks/elastic/universe/marathon.json.mustache @@ -334,17 +334,29 @@ "MASTER_NODE_READINESS_CHECK_DELAY": "{{master_nodes.readiness_check.delay}}", "MASTER_NODE_READINESS_CHECK_TIMEOUT": "{{master_nodes.readiness_check.timeout}}", + "MASTER_NODE_RLIMIT_NOFILE_SOFT": "{{master_nodes.rlimits.rlimit_nofile.soft}}", + "MASTER_NODE_RLIMIT_NOFILE_HARD": "{{master_nodes.rlimits.rlimit_nofile.hard}}", + "DATA_NODE_READINESS_CHECK_INTERVAL": "{{data_nodes.readiness_check.interval}}", "DATA_NODE_READINESS_CHECK_DELAY": "{{data_nodes.readiness_check.delay}}", "DATA_NODE_READINESS_CHECK_TIMEOUT": "{{data_nodes.readiness_check.timeout}}", + "DATA_NODE_RLIMIT_NOFILE_SOFT": "{{data_nodes.rlimits.rlimit_nofile.soft}}", + "DATA_NODE_RLIMIT_NOFILE_HARD": "{{data_nodes.rlimits.rlimit_nofile.hard}}", + "INGEST_NODE_READINESS_CHECK_INTERVAL": "{{ingest_nodes.readiness_check.interval}}", "INGEST_NODE_READINESS_CHECK_DELAY": "{{ingest_nodes.readiness_check.delay}}", "INGEST_NODE_READINESS_CHECK_TIMEOUT": "{{ingest_nodes.readiness_check.timeout}}", + "INGEST_NODE_RLIMIT_NOFILE_SOFT": "{{ingest_nodes.rlimits.rlimit_nofile.soft}}", + "INGEST_NODE_RLIMIT_NOFILE_HARD": "{{ingest_nodes.rlimits.rlimit_nofile.hard}}", + "COORDINATOR_NODE_READINESS_CHECK_INTERVAL": "{{coordinator_nodes.readiness_check.interval}}", "COORDINATOR_NODE_READINESS_CHECK_DELAY": "{{coordinator_nodes.readiness_check.delay}}", - "COORDINATOR_NODE_READINESS_CHECK_TIMEOUT": "{{coordinator_nodes.readiness_check.timeout}}" + "COORDINATOR_NODE_READINESS_CHECK_TIMEOUT": "{{coordinator_nodes.readiness_check.timeout}}", + + "COORDINATOR_NODE_RLIMIT_NOFILE_SOFT": "{{coordinator_nodes.rlimits.rlimit_nofile.soft}}", + "COORDINATOR_NODE_RLIMIT_NOFILE_HARD": "{{coordinator_nodes.rlimits.rlimit_nofile.hard}}" }, "fetch": [ { "uri": "{{resource.assets.uris.bootstrap-zip}}", "cache": true }, diff --git a/frameworks/hdfs/src/main/dist/svc.yml b/frameworks/hdfs/src/main/dist/svc.yml index d0293adde15..6e81354ad49 100644 --- a/frameworks/hdfs/src/main/dist/svc.yml +++ b/frameworks/hdfs/src/main/dist/svc.yml @@ -10,6 +10,10 @@ pods: - {{HDFS_BIN_URI}} - {{HDFS_JAVA_URI}} - {{BOOTSTRAP_URI}} + rlimits: + RLIMIT_NOFILE: + soft: {{JOURNAL_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{JOURNAL_NODE_RLIMIT_NOFILE_HARD}} {{#SECURITY_KERBEROS_ENABLED}} secrets: keytab: @@ -146,6 +150,10 @@ pods: - {{HDFS_JAVA_URI}} - {{BOOTSTRAP_URI}} - {{ZONE_RESOLVER}} + rlimits: + RLIMIT_NOFILE: + soft: {{NAME_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{NAME_NODE_RLIMIT_NOFILE_HARD}} {{#SECURITY_KERBEROS_ENABLED}} secrets: keytab: @@ -413,6 +421,10 @@ pods: - {{HDFS_BIN_URI}} - {{HDFS_JAVA_URI}} - {{BOOTSTRAP_URI}} + rlimits: + RLIMIT_NOFILE: + soft: {{DATA_NODE_RLIMIT_NOFILE_SOFT}} + hard: {{DATA_NODE_RLIMIT_NOFILE_HARD}} {{#SECURITY_KERBEROS_ENABLED}} secrets: keytab: diff --git a/frameworks/hdfs/universe/config.json b/frameworks/hdfs/universe/config.json index dece3b93fb9..e1acc930953 100644 --- a/frameworks/hdfs/universe/config.json +++ b/frameworks/hdfs/universe/config.json @@ -210,6 +210,30 @@ "type": "string", "description": "JVM options to specify when running the journal node. This overrides HADOOP_HEAPSIZE Xmx value for the journal nodes.", "default": "" + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ @@ -589,6 +613,30 @@ "minimum": 180 } } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ @@ -861,6 +909,30 @@ "minimum": 60 } } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ diff --git a/frameworks/hdfs/universe/marathon.json.mustache b/frameworks/hdfs/universe/marathon.json.mustache index 3c35f0afa57..5a939898632 100644 --- a/frameworks/hdfs/universe/marathon.json.mustache +++ b/frameworks/hdfs/universe/marathon.json.mustache @@ -109,6 +109,15 @@ "DATA_NODE_READINESS_CHECK_INTERVAL": "{{data_node.readiness_check.interval}}", "DATA_NODE_READINESS_CHECK_TIMEOUT": "{{data_node.readiness_check.timeout}}", + "JOURNAL_NODE_RLIMIT_NOFILE_SOFT" : "{{journal_node.rlimits.rlimit_nofile.soft}}", + "JOURNAL_NODE_RLIMIT_NOFILE_HARD" : "{{journal_node.rlimits.rlimit_nofile.hard}}", + + "NAME_NODE_RLIMIT_NOFILE_SOFT" : "{{name_node.rlimits.rlimit_nofile.soft}}", + "NAME_NODE_RLIMIT_NOFILE_HARD" : "{{name_node.rlimits.rlimit_nofile.hard}}", + + "DATA_NODE_RLIMIT_NOFILE_SOFT" : "{{data_node.rlimits.rlimit_nofile.soft}}", + "DATA_NODE_RLIMIT_NOFILE_HARD" : "{{data_node.rlimits.rlimit_nofile.hard}}", + {{#service.security.kerberos.enabled}} "SECURITY_KERBEROS_KEYTAB_SECRET": "{{service.security.kerberos.keytab_secret}}", "SECURITY_KERBEROS_ENABLED": "{{service.security.kerberos.enabled}}", diff --git a/frameworks/helloworld/src/main/dist/svc.yml b/frameworks/helloworld/src/main/dist/svc.yml index 2b3b3c0b59c..156fa67720b 100644 --- a/frameworks/helloworld/src/main/dist/svc.yml +++ b/frameworks/helloworld/src/main/dist/svc.yml @@ -6,6 +6,10 @@ pods: hello: count: {{HELLO_COUNT}} placement: '{{{HELLO_PLACEMENT}}}' + rlimits: + RLIMIT_NOFILE: + soft: {{HELLO_RLIMIT_NOFILE_SOFT}} + hard: {{HELLO_RLIMIT_NOFILE_HARD}} tasks: server: goal: RUNNING @@ -30,6 +34,10 @@ pods: count: {{WORLD_COUNT}} allow-decommission: true placement: '{{{WORLD_PLACEMENT}}}' + rlimits: + RLIMIT_NOFILE: + soft: {{WORLD_RLIMIT_NOFILE_SOFT}} + hard: {{WORLD_RLIMIT_NOFILE_HARD}} tasks: server: goal: RUNNING diff --git a/frameworks/helloworld/universe/config.json b/frameworks/helloworld/universe/config.json index 776091f1c64..f9f9985e4a6 100644 --- a/frameworks/helloworld/universe/config.json +++ b/frameworks/helloworld/universe/config.json @@ -229,6 +229,30 @@ "description": "labels", "type": "string", "default": "" + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ @@ -314,6 +338,30 @@ "minimum": 10 } } + }, + "rlimits": { + "description" : "POSIX resource limits applied to the pod. Excercise caution when modifying these default values as it can lead to spurious task failures.", + "type": "object", + "properties": { + "rlimit_nofile": { + "description": "Specifies RLIMIT_NOFILE, a value one greater than the maximum file descriptor number that can be opened by this process.", + "type": "object", + "properties": { + "soft" : { + "type": "integer", + "description": "The soft limit is the value that the kernel enforces for the corresponding resource.", + "default": 128000, + "minimum": 128000 + }, + "hard" : { + "type": "integer", + "description": "The hard limit acts as a ceiling for the soft limit.", + "default": 128000, + "minimum": 128000 + } + } + } + } } }, "required": [ diff --git a/frameworks/helloworld/universe/marathon.json.mustache b/frameworks/helloworld/universe/marathon.json.mustache index 30647ca6c9c..eedea2d62b1 100644 --- a/frameworks/helloworld/universe/marathon.json.mustache +++ b/frameworks/helloworld/universe/marathon.json.mustache @@ -111,7 +111,13 @@ "WORLD_READINESS_CHECK_INTERVAL": "{{world.readiness_check.interval}}", "WORLD_READINESS_CHECK_DELAY": "{{world.readiness_check.delay}}", - "WORLD_READINESS_CHECK_TIMEOUT": "{{world.readiness_check.timeout}}" + "WORLD_READINESS_CHECK_TIMEOUT": "{{world.readiness_check.timeout}}", + + "HELLO_RLIMIT_NOFILE_SOFT": "{{hello.rlimits.rlimit_nofile.soft}}", + "HELLO_RLIMIT_NOFILE_HARD": "{{hello.rlimits.rlimit_nofile.hard}}", + + "WORLD_RLIMIT_NOFILE_SOFT": "{{world.rlimits.rlimit_nofile.soft}}", + "WORLD_RLIMIT_NOFILE_HARD": "{{world.rlimits.rlimit_nofile.hard}}" }, "fetch": [ { "uri": "{{resource.assets.uris.bootstrap-zip}}", "cache": true },