Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubernetes integration and graceful shutdown #960

Merged
merged 18 commits into from
Oct 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 61 additions & 32 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,53 +10,65 @@
import asyncio
import logging
import os
import platform
import signal
import sys
import time
from datetime import datetime
from functools import wraps

import humanize
from docopt import docopt
from prometheus_client import start_http_server

import server
from server import info
from server.config import config
from server.control import ControlServer
from server.game_service import GameService
from server.health import HealthServer
from server.ice_servers.nts import TwilioNTS
from server.player_service import PlayerService
from server.profiler import Profiler
from server.protocol import QDataStreamProtocol, SimpleJsonProtocol


def log_signal(func):
@wraps(func)
def wrapped(sig, frame):
logger.info("Received signal %s", signal.Signals(sig))
return func(sig, frame)

return wrapped


async def main():
global startup_time, shutdown_time

version = os.environ.get("VERSION") or "dev"
python_version = platform.python_version()

logger.info(
"Lobby %s (Python %s) on %s",
version,
python_version,
sys.platform
"Lobby %s (Python %s) on %s named %s",
info.VERSION,
info.PYTHON_VERSION,
sys.platform,
info.CONTAINER_NAME,
)

if config.ENABLE_METRICS:
logger.info("Using prometheus on port: %i", config.METRICS_PORT)
start_http_server(config.METRICS_PORT)

loop = asyncio.get_running_loop()
done = loop.create_future()

logger.info("Event loop: %s", loop)

def signal_handler(sig: int, _frame):
logger.info(
"Received signal %s, shutting down",
signal.Signals(sig)
)
@log_signal
def done_handler(sig: int, frame):
if not done.done():
done.set_result(0)

# Make sure we can shutdown gracefully
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, done_handler)
signal.signal(signal.SIGINT, done_handler)

database = server.db.FAFDatabase(
host=config.DB_SERVER,
Expand Down Expand Up @@ -91,19 +103,21 @@ def signal_handler(sig: int, _frame):
config.register_callback("PROFILING_DURATION", profiler.refresh)
config.register_callback("PROFILING_INTERVAL", profiler.refresh)

await instance.start_services()

ctrl_server = await server.run_control_server(player_service, game_service)
health_server = HealthServer(instance)
await health_server.run_from_config()
config.register_callback(
"HEALTH_SERVER_PORT",
health_server.run_from_config
)

async def restart_control_server():
nonlocal ctrl_server
control_server = ControlServer(instance)
await control_server.run_from_config()
config.register_callback(
"CONTROL_SERVER_PORT",
control_server.run_from_config
)

await ctrl_server.shutdown()
ctrl_server = await server.run_control_server(
player_service,
game_service
)
config.register_callback("CONTROL_SERVER_PORT", restart_control_server)
await instance.start_services()

PROTO_CLASSES = {
QDataStreamProtocol.__name__: QDataStreamProtocol,
Expand Down Expand Up @@ -135,8 +149,8 @@ async def restart_control_server():
)

server.metrics.info.info({
"version": version,
"python_version": python_version,
"version": info.VERSION,
"python_version": info.PYTHON_VERSION,
"start_time": datetime.utcnow().strftime("%m-%d %H:%M"),
"game_uid": str(game_service.game_id_counter)
})
Expand All @@ -150,12 +164,27 @@ async def restart_control_server():
shutdown_time = time.perf_counter()

# Cleanup
await instance.shutdown()
await ctrl_server.shutdown()
await instance.graceful_shutdown()

drain_task = asyncio.create_task(instance.drain())

# Close DB connections
@log_signal
def drain_handler(sig: int, frame):
if not drain_task.done():
drain_task.cancel()

# Allow us to force shut down by skipping the drain
signal.signal(signal.SIGTERM, drain_handler)
signal.signal(signal.SIGINT, drain_handler)

await drain_task
await instance.shutdown()
await control_server.shutdown()
await database.close()

# Health server should be the last thing to shut down
await health_server.shutdown()

return exit_code


Expand Down Expand Up @@ -191,7 +220,7 @@ async def restart_control_server():
stop_time = time.perf_counter()
logger.info(
"Total server uptime: %s",
humanize.naturaldelta(stop_time - startup_time)
humanize.precisedelta(stop_time - startup_time)
)

if shutdown_time is not None:
Expand Down
83 changes: 83 additions & 0 deletions minikube-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
apiVersion: v1
kind: Service
metadata:
name: faf-lobby
labels:
app: faf-lobby
spec:
type: NodePort
selector:
app: faf-lobby
ports:
- port: 8001
name: qstream
- port: 8002
name: simplejson
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: faf-lobby
spec:
replicas: 1
selector:
matchLabels:
app: faf-lobby
template:
metadata:
labels:
app: faf-lobby
spec:
terminationGracePeriodSeconds: 310
containers:
- name: faf-python-server
image: faf-python-server:graceful
imagePullPolicy: Never
readinessProbe:
httpGet:
path: /ready
port: health
initialDelaySeconds: 4
periodSeconds: 1
ports:
- containerPort: 4000
name: control
- containerPort: 2000
name: health
- containerPort: 8001
name: qstream
- containerPort: 8002
name: simplejson
env:
- name: CONFIGURATION_FILE
value: /config/config.yaml
- name: CONTAINER_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: config
mountPath: /config
readOnly: true
volumes:
- name: config
configMap:
name: minikube-dev-config
items:
- key: config.yaml
path: config.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
name: minikube-dev-config
data:
config.yaml: |
LOG_LEVEL: TRACE
USE_POLICY_SERVER: false
QUEUE_POP_TIME_MAX: 30
SHUTDOWN_GRACE_PERIOD: 300
SHUTDOWN_KICK_IDLE_PLAYERS: true

DB_SERVER: host.minikube.internal
MQ_SERVER: host.minikube.internal
Loading