Skip to content

Commit 4314cf9

Browse files
authored
Add GPU to VM installation as an option (#201)
* IaC update for GPU in VM mode
1 parent 0a5b0ef commit 4314cf9

File tree

11 files changed

+182
-37
lines changed

11 files changed

+182
-37
lines changed

.github/workflows/documentation.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ on:
99
paths:
1010
- "docs/**"
1111
- "helm/**"
12+
- ".github/workflows/documentation.yml"
1213

1314
# Allows running this workflow manually
1415
workflow_dispatch:

.github/workflows/opentofu.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Workflow for validating Infrastructure as Code
2+
name: Validate Infrastructure as Code
3+
4+
on:
5+
pull_request:
6+
types:
7+
- opened
8+
- synchronize
9+
- reopened
10+
- ready_for_review
11+
# Limit runs to only when opentofu changes
12+
paths:
13+
- "opentofu/**"
14+
- ".github/workflows/opentofu.yml"
15+
16+
# Allows running this workflow manually
17+
workflow_dispatch:
18+
19+
jobs:
20+
check:
21+
if: github.event.pull_request.draft == false
22+
runs-on: ubuntu-latest
23+
container:
24+
image: hashicorp/terraform:latest
25+
# Block merging if the job fails
26+
permissions:
27+
pull-requests: write
28+
29+
steps:
30+
- name: Checkout Code
31+
uses: actions/checkout@v4
32+
33+
- name: Initialize Infrastructure as Code
34+
working-directory: ./opentofu
35+
run: terraform init -backend=false
36+
37+
- name: Validate Infrastructure as Code
38+
working-directory: ./opentofu
39+
run: terraform validate
40+
41+
- name: Validate Infrastructure as Code formatting
42+
working-directory: ./opentofu
43+
run: terraform fmt -recursive -check

.github/workflows/pytest.yml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@ jobs:
2020
docker:
2121
image: docker:latest
2222
options: --privileged
23+
# Block merging if the job fails
24+
permissions:
25+
pull-requests: write
2326

2427
steps:
25-
- uses: actions/checkout@v4
26-
with:
27-
fetch-depth: 2
28+
- name: Checkout Code
29+
uses: actions/checkout@v4
2830

2931
- uses: docker/setup-buildx-action@v3
3032
with:
@@ -53,7 +55,3 @@ jobs:
5355
5456
- name: Run All Tests
5557
run: pytest
56-
57-
# Block merging if the job fails
58-
permissions:
59-
pull-requests: write

.github/workflows/releases.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
exit 1
2727
fi
2828
29-
- name: Checkout repository
29+
- name: Checkout Code
3030
uses: actions/checkout@v4
3131

3232
- name: Build and Push Infrastructure as Code

opentofu/main.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,11 @@ module "vm" {
9292
adb_password = local.adb_password
9393
streamlit_client_port = local.streamlit_client_port
9494
fastapi_server_port = local.fastapi_server_port
95+
vm_is_gpu_shape = var.vm_is_gpu_shape
9596
compute_os_ver = var.compute_os_ver
9697
compute_cpu_ocpu = var.compute_cpu_ocpu
9798
compute_cpu_shape = var.compute_cpu_shape
99+
compute_gpu_shape = var.compute_gpu_shape
98100
availability_domains = local.availability_domains
99101
private_subnet_id = module.network.private_subnet_ocid
100102
providers = {

opentofu/modules/vm/data.tf

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
data "oci_core_images" "images" {
88
compartment_id = var.compartment_id
99
operating_system = "Oracle Linux"
10-
shape = var.compute_cpu_shape
10+
shape = local.vm_compute_shape
1111

1212
filter {
13-
name = "display_name"
14-
values = ["Oracle-Linux-${var.compute_os_ver}-.*"]
15-
regex = true
13+
name = "display_name"
14+
values = [
15+
var.vm_is_gpu_shape ? "Oracle-Linux-${var.compute_os_ver}-.*(GPU|NVIDIA|A10).*" : "Oracle-Linux-${var.compute_os_ver}-.*"
16+
]
17+
regex = true
1618
}
1719

1820
sort_by = "TIMECREATED"
@@ -29,4 +31,38 @@ data "oci_core_services" "core_services" {
2931
values = ["All .* Services In Oracle Services Network"]
3032
regex = true
3133
}
34+
}
35+
36+
data "cloudinit_config" "workers" {
37+
gzip = true
38+
base64_encode = true
39+
40+
# Expand root filesystem to fill available space on volume
41+
part {
42+
content_type = "text/cloud-config"
43+
content = jsonencode({
44+
# https://cloudinit.readthedocs.io/en/latest/reference/modules.html#growpart
45+
growpart = {
46+
mode = "auto"
47+
devices = ["/"]
48+
ignore_growroot_disabled = false
49+
}
50+
51+
# https://cloudinit.readthedocs.io/en/latest/reference/modules.html#resizefs
52+
resize_rootfs = true
53+
54+
# Resize logical LVM root volume when utility is present
55+
bootcmd = ["if [[ -f /usr/libexec/oci-growfs ]]; then /usr/libexec/oci-growfs -y; fi"]
56+
})
57+
filename = "10-growpart.yml"
58+
merge_type = "list(append)+dict(no_replace,recurse_list)+str(append)"
59+
}
60+
61+
# Startup Initialisation
62+
part {
63+
content_type = "text/x-shellscript"
64+
content = local.cloud_init
65+
filename = "50-custom-init.sh"
66+
merge_type = "list(append)+dict(no_replace,recurse_list)+str(append)"
67+
}
3268
}

opentofu/modules/vm/locals.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,6 @@ locals {
1010
db_name = var.adb_name
1111
db_password = var.adb_password
1212
})
13+
14+
vm_compute_shape = var.vm_is_gpu_shape ? var.compute_gpu_shape : var.compute_cpu_shape
1315
}

opentofu/modules/vm/main.tf

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,18 @@ resource "oci_core_instance" "instance" {
6060
compartment_id = var.compartment_id
6161
display_name = format("%s-compute", var.label_prefix)
6262
availability_domain = var.availability_domains[0]
63-
shape = var.compute_cpu_shape
64-
shape_config {
65-
memory_in_gbs = var.compute_cpu_ocpu * 16
66-
ocpus = var.compute_cpu_ocpu
63+
shape = local.vm_compute_shape
64+
dynamic "shape_config" {
65+
for_each = var.vm_is_gpu_shape ? [] : [1]
66+
content {
67+
memory_in_gbs = var.compute_cpu_ocpu * 16
68+
ocpus = var.compute_cpu_ocpu
69+
}
6770
}
6871
source_details {
6972
source_type = "image"
7073
source_id = data.oci_core_images.images.images[0].id
71-
boot_volume_size_in_gbs = 50
74+
boot_volume_size_in_gbs = 100
7275
}
7376
agent_config {
7477
are_all_plugins_disabled = false
@@ -85,7 +88,7 @@ resource "oci_core_instance" "instance" {
8588
nsg_ids = [oci_core_network_security_group.compute.id]
8689
}
8790
metadata = {
88-
user_data = "${base64encode(local.cloud_init)}"
91+
user_data = data.cloudinit_config.workers.rendered
8992
}
9093
lifecycle {
9194
create_before_destroy = true

opentofu/modules/vm/variables.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ variable "vcn_id" {
3232
variable "private_subnet_id" {
3333
type = string
3434
}
35+
36+
variable "vm_is_gpu_shape" {
37+
type = bool
38+
}
39+
3540
variable "compute_os_ver" {
3641
type = string
3742
}
@@ -44,6 +49,10 @@ variable "compute_cpu_ocpu" {
4449
type = number
4550
}
4651

52+
variable "compute_gpu_shape" {
53+
type = string
54+
}
55+
4756
variable "adb_name" {
4857
type = string
4958
}

0 commit comments

Comments
 (0)