Skip to content

Commit

Permalink
Merge pull request #77 from uptick/feat/add-instrumnetation
Browse files Browse the repository at this point in the history
DEV-834 feat: add instrumnetation to gitops
  • Loading branch information
uptickmetachu authored Sep 10, 2024
2 parents 08769ed + 6998cf4 commit 6ec69c3
Show file tree
Hide file tree
Showing 7 changed files with 648 additions and 101 deletions.
15 changes: 15 additions & 0 deletions .mise.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ uv = { version = "0.4.0"}
[env]
_.python.venv = { path = "{{config_root}}/.venv", create = true }

[tasks.start]
run = "uvicorn --host 0.0.0.0 --port 8000 gitops_server.main:app --reload"

[tasks."install"]
run = ["uv sync --all-extras"]
Expand All @@ -30,3 +32,16 @@ run = "helm install --dry-run --debug -f charts/gitops/values.yaml debug charts/
[tasks.release]
description = "Bump release versions across all files"
run = "python release.py"

[tasks.start_otel]
run = ["""
OTEL_PYTHON_LOG_CORRELATION=true \
OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=True \
OTEL_LOGS_EXPORTER=none \
OTEL_METRICS_EXPORTER=none \
OTEL_TRACES_EXPORTER=console \
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \
OTEL_RESOURCE_ATTRIBUTES=server_name=gitops \
OTEL_SERVICE_NAME=gitops-test \
OTEL_EXPORTER_OTLP_TIMEOUT=1 \
uvicorn --host 0.0.0.0 --port 7000 gitops_server.main:app"""]
12 changes: 10 additions & 2 deletions gitops_server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,23 @@
import hashlib
import hmac
import logging
import logging.config

from fastapi import HTTPException, Request
from uptick_observability.fastapi import manually_instrument_fastapi # type: ignore[import-untyped]
from uptick_observability.logging import ( # type: ignore[import-untyped]
DEFAULT_LOGGING_CONFIG_DICT,
manually_instrument_logging,
)

from gitops_server import settings
from gitops_server.app import app
from gitops_server.workers import DeploymentStatusWorker, DeployQueueWorker

logging.basicConfig(level=logging.INFO)
manually_instrument_logging()
manually_instrument_fastapi()

logging.config.dictConfig(DEFAULT_LOGGING_CONFIG_DICT)
logger = logging.getLogger("gitops")


Expand All @@ -20,7 +29,6 @@ def filter(self, record: logging.LogRecord) -> bool:

# Filter out / from access logs (We don't care about these calls)
logging.getLogger("uvicorn.access").addFilter(EndpointFilter())
logger = logging.getLogger("document-wrapper")


@app.get("/")
Expand Down
19 changes: 13 additions & 6 deletions gitops_server/utils/git.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import logging
import os
import tempfile
from contextlib import asynccontextmanager
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager

from opentelemetry.trace import get_tracer

from . import run

tracer = get_tracer(__name__)

BASE_REPO_DIR = "/var/gitops/repos"

logger = logging.getLogger("gitops")
Expand All @@ -17,14 +21,17 @@ async def clone_repo(git_repo_url: str, path: str, sha: str | None = None):

url_with_oauth_token = git_repo_url.replace("://", f"://{os.environ['GITHUB_OAUTH_TOKEN'].strip()}@")

await run(f"git clone {url_with_oauth_token} {path}; cd {path}; git checkout {sha}")
with tracer.start_as_current_span("tempo_repo.clone_repo"):
await run(f"git clone {url_with_oauth_token} {path}; cd {path}; git checkout {sha}")

await run(f'cd {path}; git-crypt unlock {os.environ["GIT_CRYPT_KEY_FILE"]}')
with tracer.start_as_current_span("temp_repo.git_crypt_unlock"):
await run(f'cd {path}; git-crypt unlock {os.environ["GIT_CRYPT_KEY_FILE"]}')


@asynccontextmanager
async def temp_repo(git_repo_url: str, sha: str | None = None) -> AsyncGenerator[str, None]:
"""Checks out a git_repo_url to a temporary folder location. Returns temporary folder location"""
with tempfile.TemporaryDirectory() as temporary_folder_path:
await clone_repo(git_repo_url, path=temporary_folder_path, sha=sha)
yield temporary_folder_path
with tracer.start_as_current_span("checkout_temp_repo"):
with tempfile.TemporaryDirectory() as temporary_folder_path:
await clone_repo(git_repo_url, path=temporary_folder_path, sha=sha)
yield temporary_folder_path
190 changes: 105 additions & 85 deletions gitops_server/workers/deployer/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import tempfile
import uuid

from opentelemetry import trace

from gitops.common.app import App
from gitops_server import settings
from gitops_server.types import AppDefinitions, UpdateAppResult
Expand All @@ -13,12 +15,15 @@

from .hooks import handle_failed_deploy, handle_successful_deploy

tracer = trace.get_tracer(__name__)

BASE_REPO_DIR = "/var/gitops/repos"
ROLE_ARN = f"arn:aws:iam::{settings.ACCOUNT_ID}:role/GitopsAccess"
logger = logging.getLogger("gitops")
GITOPS_MAX_PARALLEL_DEPLOYS = os.environ.get("GITOPS_MAX_PARALLEL_DEPLOYS", "5")


@tracer.start_as_current_span("post_init_summary")
async def post_init_summary(source, username, added_apps, updated_apps, removed_apps, commit_message):
deltas = ""
for typ, d in [("Adding", added_apps), ("Updating", updated_apps), ("Removing", removed_apps)]:
Expand All @@ -31,6 +36,7 @@ async def post_init_summary(source, username, added_apps, updated_apps, removed_
)


@tracer.start_as_current_span("post_result")
async def post_result(app: App, result: UpdateAppResult, deployer: "Deployer", **kwargs):
if result["exit_code"] != 0:
deploy_result = await handle_failed_deploy(app, result, deployer)
Expand All @@ -45,6 +51,7 @@ async def post_result(app: App, result: UpdateAppResult, deployer: "Deployer", *
await handle_successful_deploy(app, result, deployer)


@tracer.start_as_current_span("post_result_summary")
async def post_result_summary(source: str, results: list[UpdateAppResult]):
n_success = sum([r["exit_code"] == 0 for r in results])
n_failed = sum([r["exit_code"] != 0 for r in results])
Expand All @@ -55,6 +62,7 @@ async def post_result_summary(source: str, results: list[UpdateAppResult]):
)


@tracer.start_as_current_span("load_app_definitions")
async def load_app_definitions(url: str, sha: str) -> AppDefinitions:
logger.info(f'Loading app definitions at "{sha}".')
async with temp_repo(url, sha=sha) as repo:
Expand Down Expand Up @@ -107,99 +115,111 @@ async def from_push_event(cls, push_event):
)

async def deploy(self):
added_apps, updated_apps, removed_apps = self.calculate_app_deltas()
if not (added_apps | updated_apps | removed_apps):
logger.info("No deltas; aborting.")
return
logger.info(
f"Running deployment for these deltas: A{list(added_apps)}, U{list(updated_apps)},"
f" R{list(removed_apps)}"
)
await post_init_summary(
source=self.current_app_definitions.name,
username=self.author_name,
added_apps=added_apps,
updated_apps=updated_apps,
removed_apps=removed_apps,
commit_message=self.commit_message,
)
update_results = await asyncio.gather(
*[
self.update_app_deployment(self.current_app_definitions.apps[app_name])
for app_name in (added_apps | updated_apps)
]
)
uninstall_results = await asyncio.gather(
*[self.uninstall_app(self.previous_app_definitions.apps[app_name]) for app_name in removed_apps]
)
await post_result_summary(self.current_app_definitions.name, update_results + uninstall_results)
with tracer.start_as_current_span("deploy"):
added_apps, updated_apps, removed_apps = self.calculate_app_deltas()
if not (added_apps | updated_apps | removed_apps):
logger.info("No deltas; aborting.")
return
logger.info(
f"Running deployment for these deltas: A{list(added_apps)}, U{list(updated_apps)},"
f" R{list(removed_apps)}"
)
await post_init_summary(
source=self.current_app_definitions.name,
username=self.author_name,
added_apps=added_apps,
updated_apps=updated_apps,
removed_apps=removed_apps,
commit_message=self.commit_message,
)
update_results = await asyncio.gather(
*[
self.update_app_deployment(self.current_app_definitions.apps[app_name])
for app_name in (added_apps | updated_apps)
]
)
uninstall_results = await asyncio.gather(
*[self.uninstall_app(self.previous_app_definitions.apps[app_name]) for app_name in removed_apps]
)
await post_result_summary(self.current_app_definitions.name, update_results + uninstall_results)

async def uninstall_app(self, app: App) -> UpdateAppResult:
async with self.semaphore:
logger.info(f"Uninstalling app {app.name!r}.")
result = await run(f"helm uninstall {app.name} -n {app.namespace}", suppress_errors=True)
if result:
update_result = UpdateAppResult(app_name=app.name, slack_message="", **result)
await post_result(
app=app,
result=update_result,
deployer=self,
)
return update_result
with tracer.start_as_current_span("uninstall_app", attributes={"app": app.name}):
async with self.semaphore:
logger.info(f"Uninstalling app {app.name!r}.")
result = await run(f"helm uninstall {app.name} -n {app.namespace}", suppress_errors=True)
if result:
update_result = UpdateAppResult(app_name=app.name, slack_message="", **result)
await post_result(
app=app,
result=update_result,
deployer=self,
)
return update_result

async def update_app_deployment(self, app: App) -> UpdateAppResult | None:
app.set_value("deployment.labels.gitops/deploy_id", self.deploy_id)
app.set_value("deployment.labels.gitops/status", github.STATUSES.in_progress)
if github_deployment_url := app.values.get("github/deployment_url"):
app.set_value("deployment.annotations.github/deployment_url", github_deployment_url)

async with self.semaphore:
logger.info(f"Deploying app {app.name!r}.")
if app.chart.type == "git":
assert app.chart.git_repo_url
async with temp_repo(app.chart.git_repo_url, sha=app.chart.git_sha) as chart_folder_path:
await run(f"cd {chart_folder_path}; helm dependency build")
with tracer.start_as_current_span("update_app_deployment", attributes={"app": app.name}) as span:
app.set_value("deployment.labels.gitops/deploy_id", self.deploy_id)
app.set_value("deployment.labels.gitops/status", github.STATUSES.in_progress)
if github_deployment_url := app.values.get("github/deployment_url"):
app.set_value("deployment.annotations.github/deployment_url", github_deployment_url)

async with self.semaphore:
logger.info(f"Deploying app {app.name!r}.")
if app.chart.type == "git":
span.set_attribute("gitops.chart.type", "git")
assert app.chart.git_repo_url
async with temp_repo(app.chart.git_repo_url, sha=app.chart.git_sha) as chart_folder_path:
with tracer.start_as_current_span("helm_dependency_build"):
await run(f"cd {chart_folder_path}; helm dependency build")

with tempfile.NamedTemporaryFile(suffix=".yml") as cfg:
cfg.write(json.dumps(app.values).encode())
cfg.flush()
os.fsync(cfg.fileno())

with tracer.start_as_current_span("helm_upgrade"):
result = await run(
"helm secrets upgrade --create-namespace"
" --install"
" --timeout=600s"
f"{' --set skip_migrations=true' if self.skip_migrations else ''}"
f" -f {cfg.name}"
f" --namespace={app.namespace}"
f" {app.name}"
f" {chart_folder_path}",
suppress_errors=True,
)
elif app.chart.type == "helm":
span.set_attribute("gitops.chart.type", "helm")
with tempfile.NamedTemporaryFile(suffix=".yml") as cfg:
cfg.write(json.dumps(app.values).encode())
cfg.flush()
os.fsync(cfg.fileno())
result = await run(
"helm secrets upgrade --create-namespace"
" --install"
" --timeout=600s"
f"{' --set skip_migrations=true' if self.skip_migrations else ''}"
f" -f {cfg.name}"
f" --namespace={app.namespace}"
f" {app.name}"
f" {chart_folder_path}",
suppress_errors=True,
)
elif app.chart.type == "helm":
with tempfile.NamedTemporaryFile(suffix=".yml") as cfg:
cfg.write(json.dumps(app.values).encode())
cfg.flush()
os.fsync(cfg.fileno())
chart_version_arguments = f" --version={app.chart.version}" if app.chart.version else ""
await run(f"helm repo add {app.chart.helm_repo} {app.chart.helm_repo_url}")
result = await run(
"helm secrets upgrade --create-namespace"
" --install"
" --timeout=600s"
f"{' --set skip_migrations=true' if self.skip_migrations else ''}"
f" -f {cfg.name}"
f" --namespace={app.namespace}"
f" {app.name}"
f" {app.chart.helm_chart} {chart_version_arguments}",
suppress_errors=True,
)
else:
logger.warning("Local is not implemented yet")
return None

update_result = UpdateAppResult(app_name=app.name, slack_message="", **result)

await post_result(app=app, result=update_result, deployer=self)
return update_result
chart_version_arguments = f" --version={app.chart.version}" if app.chart.version else ""
with tracer.start_as_current_span("helm_repo_add"):
await run(f"helm repo add {app.chart.helm_repo} {app.chart.helm_repo_url}")

with tracer.start_as_current_span("helm_upgrade"):
result = await run(
"helm secrets upgrade --create-namespace"
" --install"
" --timeout=600s"
f"{' --set skip_migrations=true' if self.skip_migrations else ''}"
f" -f {cfg.name}"
f" --namespace={app.namespace}"
f" {app.name}"
f" {app.chart.helm_chart} {chart_version_arguments}",
suppress_errors=True,
)
else:
logger.warning("Local is not implemented yet")
return None

update_result = UpdateAppResult(app_name=app.name, slack_message="", **result)

await post_result(app=app, result=update_result, deployer=self)
return update_result

def calculate_app_deltas(self):
cur = self.current_app_definitions.apps.keys()
Expand Down
3 changes: 3 additions & 0 deletions gitops_server/workers/deployer/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import os

import httpx
from opentelemetry import trace

from gitops.common.app import App
from gitops_server import settings
from gitops_server.types import UpdateAppResult
from gitops_server.utils import github
from gitops_server.utils.slack import SlackGroup, SlackUser, find_commiter_slack_user

tracer = trace.get_tracer(__name__)

logger = logging.getLogger(__name__)


Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies = [
"pyyaml>=6.0.2",
"dsnparse==0.2.1",
"colorama>=0.4.4",
"boto3==1.34.34",
"boto3==1.35.4",
"humanize>=3.5.0",
"tabulate>=0.8.9",
"packaging>=24.1",
Expand All @@ -27,6 +27,7 @@ server = [
"fastapi==0.109.2",
"kubernetes_asyncio>=25.1.2",
"sentry-sdk>=1.3.0",
"uptick-observability[fastapi]>=0.2.2",
]

[build-system]
Expand Down
Loading

0 comments on commit 6ec69c3

Please sign in to comment.