Skip to content

Commit f24c96f

Browse files
Merge pull request #120 from uptick/dev-1044/fix-oom-and-rollbacks
DEV-1044 fix(server): fix oom and behave better when rollbacks are required
2 parents df864b2 + 762cdb3 commit f24c96f

File tree

2 files changed

+52
-26
lines changed

2 files changed

+52
-26
lines changed

charts/gitops/templates/deployment.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ spec:
4040
resources:
4141
requests:
4242
memory: "750Mi"
43-
limits:
44-
memory: "1500Mi"
4543

4644
envFrom:
4745
- configMapRef:

gitops_server/workers/deployer/deploy.py

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from gitops.common.app import App
1313
from gitops_server import settings
14-
from gitops_server.types import AppDefinitions, UpdateAppResult
14+
from gitops_server.types import AppDefinitions, RunOutput, UpdateAppResult
1515
from gitops_server.utils import get_repo_name_from_url, github, run, slack
1616
from gitops_server.utils.git import temp_repo
1717

@@ -133,6 +133,17 @@ async def uninstall_app(self, app: App) -> UpdateAppResult:
133133
)
134134
return update_result
135135

136+
async def rollback_deployment(self, app: App) -> None:
137+
with tracer.start_as_current_span("rollback_deployment", attributes={"app": app.name}):
138+
logger.warning(
139+
"Rolling back %s deployment due to previous failed helm install",
140+
app.name,
141+
)
142+
await run(
143+
f"helm rollback --namespace={app.namespace} {app.name}",
144+
suppress_errors=True,
145+
)
146+
136147
async def update_app_deployment(self, app: App) -> UpdateAppResult | None:
137148
app.set_value("deployment.labels.gitops/deploy_id", self.deploy_id)
138149
app.set_value("deployment.labels.gitops/status", github.STATUSES.in_progress)
@@ -155,6 +166,40 @@ async def update_app_deployment(self, app: App) -> UpdateAppResult | None:
155166
os.fsync(cfg.fileno())
156167

157168
with tracer.start_as_current_span("helm_upgrade"):
169+
170+
async def upgrade_helm_git() -> RunOutput:
171+
result = await run(
172+
"helm secrets upgrade --create-namespace"
173+
f" --history-max {MAX_HELM_HISTORY}"
174+
" --install"
175+
" --timeout=600s"
176+
f"{' --set skip_migrations=true' if self.skip_migrations else ''}"
177+
f" -f {cfg.name}"
178+
f" --namespace={app.namespace}"
179+
f" {app.name}"
180+
f" {chart_folder_path}",
181+
suppress_errors=True,
182+
)
183+
return result
184+
185+
result = await upgrade_helm_git()
186+
if result["exit_code"] != 0 and "is in progress" in result["output"]:
187+
await self.rollback_deployment(app)
188+
result = await upgrade_helm_git()
189+
190+
elif app.chart.type == "helm":
191+
span.set_attribute("gitops.chart.type", "helm")
192+
with tempfile.NamedTemporaryFile(suffix=".yml") as cfg:
193+
cfg.write(json.dumps(app.values).encode())
194+
cfg.flush()
195+
os.fsync(cfg.fileno())
196+
chart_version_arguments = f" --version={app.chart.version}" if app.chart.version else ""
197+
with tracer.start_as_current_span("helm_repo_add"):
198+
await run(f"helm repo add {app.chart.helm_repo} {app.chart.helm_repo_url}")
199+
200+
with tracer.start_as_current_span("helm_upgrade"):
201+
202+
async def upgrade_helm_chart() -> RunOutput:
158203
result = await run(
159204
"helm secrets upgrade --create-namespace"
160205
f" --history-max {MAX_HELM_HISTORY}"
@@ -164,32 +209,15 @@ async def update_app_deployment(self, app: App) -> UpdateAppResult | None:
164209
f" -f {cfg.name}"
165210
f" --namespace={app.namespace}"
166211
f" {app.name}"
167-
f" {chart_folder_path}",
212+
f" {app.chart.helm_chart} {chart_version_arguments}",
168213
suppress_errors=True,
169214
)
170-
elif app.chart.type == "helm":
171-
span.set_attribute("gitops.chart.type", "helm")
172-
with tempfile.NamedTemporaryFile(suffix=".yml") as cfg:
173-
cfg.write(json.dumps(app.values).encode())
174-
cfg.flush()
175-
os.fsync(cfg.fileno())
176-
chart_version_arguments = f" --version={app.chart.version}" if app.chart.version else ""
177-
with tracer.start_as_current_span("helm_repo_add"):
178-
await run(f"helm repo add {app.chart.helm_repo} {app.chart.helm_repo_url}")
215+
return result
179216

180-
with tracer.start_as_current_span("helm_upgrade"):
181-
result = await run(
182-
"helm secrets upgrade --create-namespace"
183-
f" --history-max {MAX_HELM_HISTORY}"
184-
" --install"
185-
" --timeout=600s"
186-
f"{' --set skip_migrations=true' if self.skip_migrations else ''}"
187-
f" -f {cfg.name}"
188-
f" --namespace={app.namespace}"
189-
f" {app.name}"
190-
f" {app.chart.helm_chart} {chart_version_arguments}",
191-
suppress_errors=True,
192-
)
217+
result = await upgrade_helm_chart()
218+
if result["exit_code"] != 0 and "is in progress" in result["output"]:
219+
await self.rollback_deployment(app)
220+
result = await upgrade_helm_chart()
193221
else:
194222
logger.warning("Local is not implemented yet")
195223
return None

0 commit comments

Comments
 (0)