Skip to content
This repository has been archived by the owner on Aug 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #386 from edx/dan-f/improve-health-timeouts
Browse files Browse the repository at this point in the history
Update timeout and logging in health check
  • Loading branch information
dan-f committed Nov 18, 2015
2 parents 9cf44f9 + 2800664 commit fc9471c
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 46 deletions.
93 changes: 58 additions & 35 deletions analytics_dashboard/core/tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from calendar import timegm
import json
import logging
import datetime
from testfixtures import LogCapture

from django.core.cache import cache
from django.test.utils import override_settings
Expand All @@ -13,11 +15,12 @@
from django_dynamic_fixture import G
from django.core.urlresolvers import reverse, reverse_lazy
from django.test import TestCase
from analyticsclient.exceptions import ClientError
from analyticsclient.exceptions import TimeoutError
from social.exceptions import AuthException
from social.utils import parse_qs

from auth_backends.backends import EdXOpenIdConnect
from core.views import OK, UNAVAILABLE
from courses.permissions import set_user_course_permissions, user_can_view_course, get_user_course_permissions


Expand Down Expand Up @@ -63,15 +66,16 @@ def assertRedirectsNoFollow(self, response, expected_url, status_code=302, **que


class ViewTests(TestCase):
def assertUnhealthyAPI(self):
def verify_health_response(self, expected_status_code, overall_status, database_connection, analytics_api):
"""Verify that the health endpoint returns the expected response."""
response = self.client.get(reverse('health'))
self.assertEqual(response.status_code, 503)
self.assertEqual(response.status_code, expected_status_code)
self.assertEqual(response['content-type'], 'application/json')
expected = {
u'overall_status': u'UNAVAILABLE',
u'overall_status': overall_status,
u'detailed_status': {
u'database_connection': u'OK',
u'analytics_api': u'UNAVAILABLE'
u'database_connection': database_connection,
u'analytics_api': analytics_api
}
}
self.assertDictEqual(json.loads(response.content), expected)
Expand All @@ -81,43 +85,62 @@ def test_status(self):
self.assertEqual(response.status_code, 200)

@mock.patch('analyticsclient.status.Status.healthy', mock.PropertyMock(return_value=True))
def test_health(self):
response = self.client.get(reverse('health'))
self.assertEqual(response.status_code, 200)
self.assertEqual(response['content-type'], 'application/json')

expected = {
u'overall_status': u'OK',
u'detailed_status': {
u'database_connection': u'OK',
u'analytics_api': u'OK'
}
}
self.assertDictEqual(json.loads(response.content), expected)
def test_healthy(self):
with LogCapture(level=logging.ERROR) as l:
self.verify_health_response(
expected_status_code=200, overall_status=OK, database_connection=OK, analytics_api=OK
)
l.check()

@mock.patch('analyticsclient.status.Status.healthy', mock.PropertyMock(return_value=True))
@mock.patch('django.db.backends.BaseDatabaseWrapper.cursor', mock.Mock(side_effect=DatabaseError))
@mock.patch('django.db.backends.BaseDatabaseWrapper.cursor', mock.Mock(side_effect=DatabaseError('example error')))
def test_health_database_outage(self):
response = self.client.get(reverse('health'))
self.assertEqual(response.status_code, 503)
self.assertEqual(response['content-type'], 'application/json')

expected = {
u'overall_status': u'UNAVAILABLE',
u'detailed_status': {
u'database_connection': u'UNAVAILABLE',
u'analytics_api': u'OK'
}
}
self.assertDictEqual(json.loads(response.content), expected)
with LogCapture(level=logging.ERROR) as l:
self.verify_health_response(
expected_status_code=503, overall_status=UNAVAILABLE, database_connection=UNAVAILABLE, analytics_api=OK
)
l.check(('analytics_dashboard.core.views', 'ERROR', 'Insights database is not reachable: example error'))

@mock.patch('analyticsclient.status.Status.healthy', mock.PropertyMock(return_value=False))
def test_health_analytics_api_unhealthy(self):
self.assertUnhealthyAPI()
with LogCapture(level=logging.ERROR) as l:
self.verify_health_response(
expected_status_code=503, overall_status=UNAVAILABLE, database_connection=OK, analytics_api=UNAVAILABLE
)
l.check(('analytics_dashboard.core.views', 'ERROR', 'Analytics API health check failed from dashboard'))

@mock.patch('analyticsclient.status.Status.healthy', mock.PropertyMock(side_effect=ClientError))
@mock.patch('analyticsclient.status.Status.healthy', mock.PropertyMock(side_effect=TimeoutError('example error')))
def test_health_analytics_api_unreachable(self):
self.assertUnhealthyAPI()
with LogCapture(level=logging.ERROR) as l:
self.verify_health_response(
expected_status_code=503, overall_status=UNAVAILABLE, database_connection=OK, analytics_api=UNAVAILABLE
)
l.check((
'analytics_dashboard.core.views',
'ERROR',
'Analytics API health check timed out from dashboard: example error'
))

@mock.patch('analyticsclient.status.Status.healthy', mock.PropertyMock(return_value=False))
@mock.patch('django.db.backends.BaseDatabaseWrapper.cursor', mock.Mock(side_effect=DatabaseError('example error')))
def test_health_both_unavailable(self):
with LogCapture(level=logging.ERROR) as l:
self.verify_health_response(
expected_status_code=503, overall_status=UNAVAILABLE,
database_connection=UNAVAILABLE, analytics_api=UNAVAILABLE
)
l.check(
(
'analytics_dashboard.core.views',
'ERROR',
'Insights database is not reachable: example error'
),
(
'analytics_dashboard.core.views',
'ERROR',
'Analytics API health check failed from dashboard'
)
)


class LoginViewTests(RedirectTestCaseMixin, TestCase):
Expand Down
32 changes: 21 additions & 11 deletions analytics_dashboard/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,24 @@
from django.views.generic import View, TemplateView
from django.core.urlresolvers import reverse_lazy
from analyticsclient.client import Client
from analyticsclient.exceptions import ClientError
from analyticsclient.exceptions import TimeoutError

from analytics_dashboard.courses import permissions


logger = logging.getLogger(__name__)
User = get_user_model()

# Health constants
OK = u'OK'
UNAVAILABLE = u'UNAVAILABLE'


def status(_request):
return HttpResponse()


def health(_request):
OK = 'OK'
UNAVAILABLE = 'UNAVAILABLE'

overall_status = analytics_api_status = database_status = UNAVAILABLE

try:
Expand All @@ -36,17 +37,26 @@ def health(_request):
cursor.fetchone()
cursor.close()
database_status = OK
except DatabaseError as e: # pylint: disable=catching-non-exception
logger.exception('Database is not reachable: %s', e)
except DatabaseError as e:
logger.exception('Insights database is not reachable: %s', e)
database_status = UNAVAILABLE

try:
client = Client(base_url=settings.DATA_API_URL, auth_token=settings.DATA_API_AUTH_TOKEN)
if client.status.healthy:
analytics_api_status = OK
except ClientError as e:
logger.exception('API is not reachable from dashboard: %s', e)
client = Client(base_url=settings.DATA_API_URL, auth_token=settings.DATA_API_AUTH_TOKEN, timeout=0.35)
# Note: client.status.healthy sends a request to the health endpoint on
# the Analytics API. The request may throw a TimeoutError. Currently,
# other exceptions are caught by the client.status.healthy method
# itself, which will return False in those cases.
analytics_api_healthy = client.status.healthy
except TimeoutError as e:
logger.exception('Analytics API health check timed out from dashboard: %s', e)
analytics_api_status = UNAVAILABLE
else:
if analytics_api_healthy:
analytics_api_status = OK
else:
logger.error('Analytics API health check failed from dashboard')
analytics_api_status = UNAVAILABLE

overall_status = OK if (analytics_api_status == database_status == OK) else UNAVAILABLE

Expand Down

0 comments on commit fc9471c

Please sign in to comment.