-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
131339: drtprod: yaml config for drt clusters r=sambhav-jain-16 ,vidit-bhat a=nameisbhaskar This PR has the YAML configurations for drt-large and drt-chos clusters. These configuration creates the clusters and does the default setup including datadog configuration. It also creates the workload clusters. The datadog setup scripts that are referred in the configuration are also part of thie PR. Fixes: #125381 Epic: None Co-authored-by: Bhaskarjyoti Bora <[email protected]>
- Loading branch information
Showing
6 changed files
with
417 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# Yaml for creating and configuring the drt-chaos and workload-chaos clusters. This also configures the datadog. | ||
environment: | ||
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected] | ||
ROACHPROD_DNS: drt.crdb.io | ||
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io | ||
ROACHPROD_GCE_DNS_ZONE: drt | ||
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt | ||
CLUSTER: drt-chaos | ||
WORKLOAD_CLUSTER: workload-chaos | ||
|
||
targets: | ||
- target_name: $CLUSTER | ||
steps: | ||
- command: create | ||
args: | ||
- $CLUSTER | ||
flags: | ||
clouds: gce | ||
gce-managed: true | ||
gce-enable-multiple-stores: true | ||
gce-zones: "us-east1-d,us-east1-b,us-east1-c" | ||
nodes: 6 | ||
gce-machine-type: n2-standard-16 | ||
local-ssd: true | ||
gce-local-ssd-count: 4 | ||
username: drt | ||
lifetime: 8760h | ||
gce-image: "ubuntu-2204-jammy-v20240319" | ||
on_rollback: | ||
- command: destroy | ||
args: | ||
- $CLUSTER | ||
- command: sync | ||
flags: | ||
clouds: gce | ||
- command: stage | ||
args: | ||
- $CLUSTER | ||
- cockroach | ||
- script: "pkg/cmd/drtprod/configs/setup_datadog_cluster" | ||
- command: start | ||
args: | ||
- $CLUSTER | ||
- "--binary" | ||
- "./cockroach" | ||
flags: | ||
enable-fluent-sink: true | ||
restart: false | ||
sql-port: 26257 | ||
on_rollback: | ||
- command: stop | ||
args: | ||
- $CLUSTER | ||
- command: run | ||
args: | ||
- $CLUSTER | ||
- -- | ||
- "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh" | ||
- target_name: $WORKLOAD_CLUSTER | ||
steps: | ||
- command: create | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
flags: | ||
clouds: gce | ||
gce-zones: "us-east1-c" | ||
nodes: 1 | ||
gce-machine-type: n2-standard-8 | ||
os-volume-size: 100 | ||
username: workload | ||
lifetime: 8760h | ||
on_rollback: | ||
- command: destroy | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
- command: sync | ||
flags: | ||
clouds: gce | ||
- command: stage | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
- cockroach | ||
- command: stage | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
- workload | ||
- script: "pkg/cmd/drtprod/configs/setup_datadog_workload" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Yaml for destroying the drt-chaos and workload-chaos clusters. | ||
environment: | ||
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected] | ||
ROACHPROD_DNS: drt.crdb.io | ||
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io | ||
ROACHPROD_GCE_DNS_ZONE: drt | ||
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt | ||
CLUSTER: drt-chaos | ||
WORKLOAD_CLUSTER: workload-chaos | ||
|
||
targets: | ||
- target_name: $CLUSTER | ||
steps: | ||
- command: destroy | ||
args: | ||
- $CLUSTER | ||
- target_name: $WORKLOAD_CLUSTER | ||
steps: | ||
- command: destroy | ||
args: | ||
- $WORKLOAD_CLUSTER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# Yaml for creating and configuring the drt-large and workload-large clusters. This also configures the datadog. | ||
environment: | ||
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected] | ||
ROACHPROD_DNS: drt.crdb.io | ||
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io | ||
ROACHPROD_GCE_DNS_ZONE: drt | ||
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt | ||
CLUSTER: drt-large | ||
WORKLOAD_CLUSTER: workload-large | ||
|
||
targets: | ||
- target_name: $CLUSTER | ||
steps: | ||
- command: create | ||
args: | ||
- $CLUSTER | ||
flags: | ||
clouds: gce | ||
gce-managed: true | ||
gce-enable-multiple-stores: true | ||
gce-zones: "northamerica-northeast2-a:2,northamerica-northeast2-b:2,northamerica-northeast2-c:1,us-east5-a:2,us-east5-b:2,us-east5-c:1,us-east1-b:2,us-east1-c:2,us-east1-d:1" | ||
nodes: 15 | ||
gce-machine-type: n2-standard-16 | ||
local-ssd: true | ||
gce-local-ssd-count: 4 | ||
os-volume-size: 100 | ||
username: drt | ||
lifetime: 8760h | ||
on_rollback: | ||
- command: destroy | ||
args: | ||
- $CLUSTER | ||
- command: sync | ||
flags: | ||
clouds: gce | ||
- command: stage | ||
args: | ||
- $CLUSTER | ||
- cockroach | ||
- script: "pkg/cmd/drtprod/configs/setup_datadog_cluster" | ||
- command: start | ||
args: | ||
- $CLUSTER | ||
- "--binary" | ||
- "./cockroach" | ||
flags: | ||
enable-fluent-sink: true | ||
store-count: 4 | ||
restart: false | ||
sql-port: 26257 | ||
on_rollback: | ||
- command: stop | ||
args: | ||
- $CLUSTER | ||
- command: run | ||
args: | ||
- $CLUSTER | ||
- -- | ||
- "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh" | ||
- command: sql | ||
args: | ||
- $CLUSTER:1 | ||
- -- | ||
- -e | ||
- "ALTER RANGE timeseries CONFIGURE ZONE USING num_replicas=5,num_voters=5" | ||
- command: sql | ||
args: | ||
- $CLUSTER:1 | ||
- -- | ||
- -e | ||
- "ALTER RANGE default CONFIGURE ZONE USING num_replicas=5,num_voters=5" | ||
- target_name: $WORKLOAD_CLUSTER | ||
steps: | ||
- command: create | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
flags: | ||
clouds: gce | ||
gce-zones: "northamerica-northeast2-a,us-east5-a,us-east1-b" | ||
nodes: 3 | ||
gce-machine-type: n2d-standard-4 | ||
os-volume-size: 100 | ||
username: workload | ||
lifetime: 8760h | ||
on_rollback: | ||
- command: destroy | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
- command: sync | ||
flags: | ||
clouds: gce | ||
- command: stage | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
- cockroach | ||
- command: stage | ||
args: | ||
- $WORKLOAD_CLUSTER | ||
- workload | ||
- script: "pkg/cmd/drtprod/configs/setup_datadog_workload" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Yaml for destroying the drt-large and workload-large clusters. | ||
environment: | ||
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected] | ||
ROACHPROD_DNS: drt.crdb.io | ||
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io | ||
ROACHPROD_GCE_DNS_ZONE: drt | ||
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt | ||
CLUSTER: drt-large | ||
WORKLOAD_CLUSTER: workload-large | ||
|
||
targets: | ||
- target_name: $CLUSTER | ||
steps: | ||
- command: destroy | ||
args: | ||
- $CLUSTER | ||
- target_name: $WORKLOAD_CLUSTER | ||
steps: | ||
- command: destroy | ||
args: | ||
- $WORKLOAD_CLUSTER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/bin/bash | ||
|
||
# Sets up datadog for the drt clusters. | ||
# NOTE - This uses CLUSTER environment variable, if not set the script fails | ||
|
||
if [ -z "${CLUSTER}" ]; then | ||
echo "environment CLUSTER is not set" | ||
exit 1 | ||
fi | ||
|
||
# TODO - this command does not work. We need to replace this with the actual dd_api_key for the script to work | ||
|
||
dd_api_key="$(gcloud --project=cockroach-drt secrets versions access latest --secret datadog-api-key)" | ||
|
||
|
||
if [ -z "${dd_api_key}" ]; then | ||
echo "Missing Datadog API key!" | ||
exit 1 | ||
fi | ||
|
||
dd_site="us5.datadoghq.com" | ||
|
||
roachprod ssh $CLUSTER -- "sudo mkdir -p /etc/fluent-bit && sudo tee /etc/fluent-bit/config-override.yaml > /dev/null << EOF | ||
--- | ||
pipeline: | ||
inputs: | ||
- name: tail | ||
path: /var/log/audit/audit.log | ||
tag: audit | ||
key: message | ||
storage.type: filesystem | ||
alias: audit | ||
outputs: | ||
- name: datadog | ||
match: audit | ||
host: http-intake.logs.${dd_site} | ||
tls: on | ||
compress: gzip | ||
apikey: ${dd_api_key} | ||
dd_source: audit | ||
dd_service: drt-cockroachdb | ||
dd_tags: env:development,cluster:${cluster%:*},service:drt-cockroachdb,team:drt | ||
alias: audit | ||
storage.total_limit_size: 25MB | ||
EOF" | ||
|
||
roachprod ssh $CLUSTER -- "sudo tee /etc/profile.d/99-datadog.sh > /dev/null << EOF | ||
export DD_SITE=${dd_site} | ||
export DD_API_KEY=${dd_api_key} | ||
export DD_TAGS=env:development,cluster${CLUSTER%:*},team:drt,service:drt-cockroachdb | ||
EOF" | ||
|
||
roachprod opentelemetry-start $CLUSTER \ | ||
--datadog-api-key "${dd_api_key}" \ | ||
--datadog-tags 'service:drt-cockroachdb,team:drt' | ||
|
||
roachprod fluent-bit-start $CLUSTER \ | ||
--datadog-api-key "${dd_api_key}" \ | ||
--datadog-service drt-cockroachdb \ | ||
--datadog-tags 'service:drt-cockroachdb,team:drt' | ||
|
||
echo | ||
echo "Updated $CLUSTER configuration to send telemetry data to Datadog." | ||
echo | ||
echo "If this was the first time this script was run against $CLUSTER then" | ||
echo "CockroachDB must be restarted to reload its logging configuration." | ||
echo | ||
|
||
exit 0 |
Oops, something went wrong.