From d78693f72e1c346de1b4259f99ac6a36ab077ad3 Mon Sep 17 00:00:00 2001 From: fredkingham Date: Wed, 27 Jun 2018 13:28:45 +0100 Subject: [PATCH 1/4] adds in pagination for search #refs opal #1565 and opal #1557 --- search/queries.py | 123 +++++++++++++++------------------------------- search/views.py | 41 +++++++++------- 2 files changed, 61 insertions(+), 103 deletions(-) diff --git a/search/queries.py b/search/queries.py index 6fedee13..b3ae59d1 100644 --- a/search/queries.py +++ b/search/queries.py @@ -2,49 +2,14 @@ Allow us to make search queries """ import datetime -from django.db.models import Max +from django.db.models import Max, Min from django.conf import settings from opal import models -from opal.core import subrecords from opal.utils import stringport from search.search_rules import SearchRule -class PatientSummary(object): - def __init__(self, episode): - self.start = episode.start - self.end = episode.end - self.episode_ids = set([episode.id]) - self.patient_id = episode.patient.id - self.categories = set([episode.category_name]) - self.id = episode.patient.demographics_set.get().id - - def update(self, episode): - if not self.start: - self.start = episode.start - elif episode.start: - if self.start > episode.start: - self.start = episode.start - - if not self.end: - self.end = episode.end - elif episode.end: - if self.end < episode.end: - self.end = episode.end - - self.episode_ids.add(episode.id) - self.categories.add(episode.category_name) - - def to_dict(self): - result = {k: getattr(self, k) for k in [ - "patient_id", "start", "end", "id" - ]} - result["categories"] = sorted(self.categories) - result["count"] = len(self.episode_ids) - return result - - def episodes_for_user(episodes, user): """ Given an iterable of EPISODES and a USER, return a filtered @@ -58,6 +23,7 @@ class QueryBackend(object): """ Base class for search implementations to inherit from """ + def __init__(self, user, query): self.user = user self.query = query @@ -74,14 +40,11 @@ def description(self): def get_patients(self): raise NotImplementedError() - def get_patient_summaries(self): + def get_patient_summaries(self, patients): raise NotImplementedError() - def patients_as_json(self): - patients = self.get_patients() - return [ - p.to_dict(self.user) for p in patients - ] + def sort_patients(self, patients): + raise NotImplementedError() class DatabaseQuery(QueryBackend): @@ -124,36 +87,44 @@ def episodes_for_criteria(self, criteria): search_rule = SearchRule.get_rule(rule_name, self.user) return search_rule.query(criteria) - def get_aggregate_patients_from_episodes(self, episodes): - # at the moment we use start/end only - patient_summaries = {} - - for episode in episodes: - patient_id = episode.patient_id - if patient_id in patient_summaries: - patient_summaries[patient_id].update(episode) - else: - patient_summaries[patient_id] = PatientSummary(episode) - - patients = models.Patient.objects.filter( - id__in=list(patient_summaries.keys()) + def get_patients(self): + episodes = self.get_episodes() + patient_ids = set([i.patient_id for i in episodes]) + return self.sort_patients( + models.Patient.objects.filter(id__in=patient_ids) ) - patients = patients.prefetch_related("demographics_set") - - results = [] - - for patient_id, patient_summary in patient_summaries.items(): - patient = next(p for p in patients if p.id == patient_id) - demographic = patient.demographics_set.get() - result = {k: getattr(demographic, k) for k in [ - "first_name", "surname", "hospital_number", "date_of_birth" - ]} + def sort_patients(self, patients): + patients = patients.annotate( + max_episode_id=Max('episode__id') + ) + return patients.order_by("-max_episode_id") - result.update(patient_summary.to_dict()) - results.append(result) + def get_patient_summary(self, patient): + result = dict() + demographics = patient.demographics_set.first() + for i in ["first_name", "surname", "hospital_number", "date_of_birth"]: + result[i] = getattr(demographics, i) + result["start"] = patient.episode_set.aggregate( + min_start=Min('start') + )["min_start"] + + result["end"] = patient.episode_set.aggregate( + max_end=Max('end') + )["max_end"] + + result["count"] = patient.episode_set.count() + result["patient_id"] = patient.id + result["categories"] = list(patient.episode_set.order_by( + "category_name" + ).values_list( + "category_name", flat=True + )) + return result - return results + def get_patient_summaries(self, patients): + patients.prefetch_related("demographics") + return [self.get_patient_summary(patient) for patient in patients] def _episodes_without_restrictions(self): all_matches = [ @@ -180,22 +151,6 @@ def get_episodes(self): return episodes_for_user( self._episodes_without_restrictions(), self.user) - def get_patient_summaries(self): - eps = self._episodes_without_restrictions() - episode_ids = [e.id for e in eps] - - # get all episodes of patients, that have episodes that - # match the criteria - all_eps = models.Episode.objects.filter( - patient__episode__in=episode_ids - ) - filtered_eps = episodes_for_user(all_eps, self.user) - return self.get_aggregate_patients_from_episodes(filtered_eps) - - def get_patients(self): - patients = set(e.patient for e in self.get_episodes()) - return list(patients) - def description(self): """ Provide a textual description of the current search diff --git a/search/views.py b/search/views.py index c5f06cbb..ee7459bb 100644 --- a/search/views.py +++ b/search/views.py @@ -133,6 +133,20 @@ def _add_pagination(eps, page_number): return results +def get_paginated_reponse(query, patients, page_number): + paginated = _add_pagination(patients, page_number) + paginated_patients = paginated["object_list"] + + # on postgres it blows up if we don't manually manage this + if not paginated_patients: + paginated_patients = models.Patient.objects.none() + + paginated["object_list"] = query.get_patient_summaries( + paginated_patients + ) + return json_response(paginated) + + @with_no_caching @require_http_methods(['GET']) @ajax_login_required @@ -151,7 +165,10 @@ def patient_search_view(request): }] query = queries.create_query(request.user, criteria) - return json_response(query.patients_as_json()) + patients = query.get_patients() + if patients.exists(): + return json_response([patients.first().to_dict(request.user)]) + return json_response([]) @with_no_caching @@ -165,21 +182,7 @@ def simple_search_view(request): query = queries.create_query(request.user, query_string) patients = query.fuzzy_query() - paginated = _add_pagination(patients, page_number) - paginated_patients = paginated["object_list"] - - # on postgres it blows up if we don't manually manage this - if not paginated_patients: - paginated_patients = models.Patient.objects.none() - - episodes = models.Episode.objects.filter( - id__in=paginated_patients.values_list("episode__id", flat=True) - ) - paginated["object_list"] = query.get_aggregate_patients_from_episodes( - episodes - ) - - return json_response(paginated) + return get_paginated_reponse(query, patients, page_number) class ExtractSearchView(View): @@ -201,9 +204,9 @@ def post(self, *args, **kwargs): self.request.user, request_data, ) - patient_summaries = query.get_patient_summaries() - - return json_response(_add_pagination(patient_summaries, page_number)) + return get_paginated_reponse( + query, query.get_patients(), page_number + ) class DownloadSearchView(View): From 8c9ee6b4d4de71cf348f2c691525d0bf006e6aa6 Mon Sep 17 00:00:00 2001 From: fredkingham Date: Wed, 27 Jun 2018 15:06:51 +0100 Subject: [PATCH 2/4] adds prefetch and annoatations to optimise the patient summaries adds additional unit tests refs opal #1565 and opal #1557 --- search/queries.py | 20 +++-- search/tests/test_search_query_integration.py | 75 +++++++++++------- search/tests/test_views.py | 79 ++++++++++++++++++- 3 files changed, 129 insertions(+), 45 deletions(-) diff --git a/search/queries.py b/search/queries.py index b3ae59d1..7599d846 100644 --- a/search/queries.py +++ b/search/queries.py @@ -2,7 +2,7 @@ Allow us to make search queries """ import datetime -from django.db.models import Max, Min +from django.db.models import Max, Min, Count from django.conf import settings from opal import models @@ -105,25 +105,23 @@ def get_patient_summary(self, patient): demographics = patient.demographics_set.first() for i in ["first_name", "surname", "hospital_number", "date_of_birth"]: result[i] = getattr(demographics, i) - result["start"] = patient.episode_set.aggregate( - min_start=Min('start') - )["min_start"] - - result["end"] = patient.episode_set.aggregate( - max_end=Max('end') - )["max_end"] - + result["start"] = patient.min_start + result["end"] = patient.max_end + result["count"] = patient.episode_count result["count"] = patient.episode_set.count() result["patient_id"] = patient.id result["categories"] = list(patient.episode_set.order_by( "category_name" ).values_list( "category_name", flat=True - )) + ).distinct()) return result def get_patient_summaries(self, patients): - patients.prefetch_related("demographics") + patients = patients.prefetch_related("demographics_set") + patients = patients.annotate(episode_count=Count("episode")) + patients = patients.annotate(min_start=Min("episode__start")) + patients = patients.annotate(max_end=Max("episode__end")) return [self.get_patient_summary(patient) for patient in patients] def _episodes_without_restrictions(self): diff --git a/search/tests/test_search_query_integration.py b/search/tests/test_search_query_integration.py index 0dff4e2b..3a36b75b 100644 --- a/search/tests/test_search_query_integration.py +++ b/search/tests/test_search_query_integration.py @@ -3,14 +3,12 @@ """ from datetime import date, datetime -from django.db import transaction from django.contrib.contenttypes.models import ContentType from mock import patch, MagicMock -import reversion from opal.tests.episodes import RestrictedEpisodeCategory from search.search_rules import SearchRule -from opal.models import Synonym, Gender +from opal.models import Synonym, Gender, Patient from opal.core.test import OpalTestCase @@ -24,27 +22,6 @@ from opal.tests.episodes import RestrictedEpisodeCategory # NOQA -class PatientSummaryTestCase(OpalTestCase): - - def test_update_sets_start(self): - patient, episode = self.new_patient_and_episode_please() - summary = queries.PatientSummary(episode) - self.assertEqual(None, summary.start) - the_date = date(day=27, month=1, year=1972) - episode2 = patient.create_episode(start=the_date) - summary.update(episode2) - self.assertEqual(summary.start, the_date) - - def test_update_sets_end(self): - patient, episode = self.new_patient_and_episode_please() - summary = queries.PatientSummary(episode) - self.assertEqual(None, summary.start) - the_date = date(day=27, month=1, year=1972) - episode2 = patient.create_episode(end=the_date) - summary.update(episode2) - self.assertEqual(summary.end, the_date) - - class QueryBackendTestCase(OpalTestCase): def test_fuzzy_query(self): @@ -65,7 +42,15 @@ def test_get_patients(self): def test_get_patient_summaries(self): with self.assertRaises(NotImplementedError): - queries.QueryBackend(self.user, 'aquery').get_patient_summaries() + queries.QueryBackend(self.user, 'aquery').get_patient_summaries( + Patient.objects.all() + ) + + def test_sort_patients(self): + with self.assertRaises(NotImplementedError): + queries.QueryBackend(self.user, 'aquery').sort_patients( + Patient.objects.all() + ) class DatabaseQueryTestCase(OpalTestCase): @@ -122,6 +107,38 @@ def test_episodes_for_number_fields_greater_than(self): query = queries.DatabaseQuery(self.user, [criteria]) self.assertEqual([self.episode], query.get_episodes()) + def test_sort_patients(self): + patient_1, episode_1 = self.new_patient_and_episode_please() + patient_2, episode_2 = self.new_patient_and_episode_please() + patient_1.create_episode() + + not_used_patient, _ = self.new_patient_and_episode_please() + query = queries.DatabaseQuery(self.user, [self.name_criteria]) + result = query.sort_patients( + Patient.objects.exclude(id=not_used_patient.id) + ) + + # should start with patient 1, because its got 2 episodes + self.assertEqual( + result[0].id, patient_1.id, + ) + + self.assertEqual( + result[1].id, patient_2.id, + ) + + # make sure its true even if we reverse it + result = query.sort_patients( + Patient.objects.exclude(id=not_used_patient.id).order_by("-id") + ) + self.assertEqual( + result[0].id, patient_1.id, + ) + + self.assertEqual( + result[1].id, patient_2.id, + ) + def test_episodes_for_number_fields_less_than(self): testmodels.FavouriteNumber.objects.create( patient=self.patient, number=10 @@ -719,9 +736,8 @@ def test_get_episodes_searching_episode_subrecord_ft_or_fk_fields(self): def test_get_patient_summaries(self): query = queries.DatabaseQuery(self.user, self.name_criteria) - summaries = query.get_patient_summaries() + summaries = query.get_patient_summaries(Patient.objects.all()) expected = [{ - 'id': self.patient.id, 'count': 1, 'hospital_number': u'0', 'date_of_birth': self.DATE_OF_BIRTH, @@ -734,7 +750,7 @@ def test_get_patient_summaries(self): }] self.assertEqual(expected, summaries) - def test_update_patient_summaries(self): + def test_get_patient_summaries_for_patient_with_multiple_episodes(self): """ with a patient with multiple episodes we expect it to aggregate these into summaries """ @@ -747,9 +763,8 @@ def test_update_patient_summaries(self): end=end_date ) query = queries.DatabaseQuery(self.user, self.name_criteria) - summaries = query.get_patient_summaries() + summaries = query.get_patient_summaries(Patient.objects.all()) expected = [{ - 'id': self.patient.id, 'count': 3, 'hospital_number': u'0', 'date_of_birth': self.DATE_OF_BIRTH, diff --git a/search/tests/test_views.py b/search/tests/test_views.py index fd635561..9829c5ea 100644 --- a/search/tests/test_views.py +++ b/search/tests/test_views.py @@ -11,6 +11,7 @@ from django.test import override_settings from mock import patch, mock_open from opal.core.test import OpalTestCase +from opal.models import Patient from search import views, queries, extract @@ -114,7 +115,6 @@ def setUp(self): u'page_number': 1, u'object_list': [{ u'count': 1, - u'id': self.patient.id, u'first_name': u'Sean', u'surname': u'Connery', u'end': u'15/10/2015', @@ -220,7 +220,7 @@ def test_number_of_queries(self): "James", "Bond", str(i) ) - with self.assertNumQueries(36): + with self.assertNumQueries(35): self.get_response('{}/?query=Bond'.format(self.url)) for i in range(20): @@ -228,7 +228,7 @@ def test_number_of_queries(self): "James", "Blofelt", str(i) ) - with self.assertNumQueries(36): + with self.assertNumQueries(35): self.get_response('{}/?query=Blofelt'.format(self.url)) def test_with_multiple_patient_episodes(self): @@ -252,7 +252,6 @@ def test_with_multiple_patient_episodes(self): "hospital_number": "23422", "date_of_birth": None, "end": None, - "id": 2, "categories": ["Inpatient"] }], "page_number": 1, @@ -261,6 +260,78 @@ def test_with_multiple_patient_episodes(self): self.assertEqual(response, expected) +class GetPaginatedReponseTestCase(OpalTestCase): + def setUp(self): + super(GetPaginatedReponseTestCase, self).setUp() + criteria = [{ + "query_type": "Equals", + "value": "1", + "field": "hospital_number", + 'combine': 'and', + 'rule': u'demographics', + }] + self.query = queries.create_query( + self.user, criteria + ) + + def get_first_names_from_response(self, response): + object_list = json.loads(response.content)["object_list"] + return [i["first_name"] for i in object_list] + + def test_first_page(self): + first_names = [ + "Jane {}".format(i) for i in range(views.PAGINATION_AMOUNT + 2) + ] + for i, name in enumerate(first_names): + patient, _ = self.new_patient_and_episode_please() + patient.demographics_set.update( + first_name=first_names[i] + ) + found = self.get_first_names_from_response(views.get_paginated_reponse( + self.query, Patient.objects.all(), 1 + )) + + self.assertEqual(found, first_names[:views.PAGINATION_AMOUNT]) + + def test_first_page_incomplete(self): + page_count = views.PAGINATION_AMOUNT - 2 + first_names = [ + "Jane {}".format(i) for i in range(page_count) + ] + for i, name in enumerate(first_names): + patient, _ = self.new_patient_and_episode_please() + patient.demographics_set.update( + first_name=first_names[i] + ) + found = self.get_first_names_from_response(views.get_paginated_reponse( + self.query, Patient.objects.all(), 1 + )) + self.assertEqual(found, first_names) + + def test_second_page(self): + first_names = [ + "Jane {}".format(i) for i in range(views.PAGINATION_AMOUNT + 2) + ] + for i, name in enumerate(first_names): + patient, _ = self.new_patient_and_episode_please() + patient.demographics_set.update( + first_name=first_names[i] + ) + found = self.get_first_names_from_response(views.get_paginated_reponse( + self.query, Patient.objects.all(), 2 + )) + + self.assertEqual(found, first_names[views.PAGINATION_AMOUNT:]) + + def test_with_none(self): + self.assertFalse(Patient.objects.exists()) + found = self.get_first_names_from_response(views.get_paginated_reponse( + self.query, Patient.objects.all(), 1 + )) + + self.assertEqual(found, []) + + class SearchTemplateTestCase(OpalTestCase): def test_search_template_view(self): From 8a6482a4fc5e4f3f141f136d764cabeb90cd0bb3 Mon Sep 17 00:00:00 2001 From: fredkingham Date: Thu, 28 Jun 2018 13:51:22 +0100 Subject: [PATCH 3/4] adds an additional unit test, removes a duplicate call to get episode count in the patient summaries as we get this already, makes sure we call so that the prefetch_related works refs opal #1565 and opal #1557 --- search/queries.py | 12 ++++++--- search/tests/test_search_query_integration.py | 26 ++++++++++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/search/queries.py b/search/queries.py index 7599d846..521dac0e 100644 --- a/search/queries.py +++ b/search/queries.py @@ -102,13 +102,17 @@ def sort_patients(self, patients): def get_patient_summary(self, patient): result = dict() - demographics = patient.demographics_set.first() - for i in ["first_name", "surname", "hospital_number", "date_of_birth"]: - result[i] = getattr(demographics, i) + + # prefetch queries only work with sad times, even though + # demographics are a one per patient thing. + for demographics in patient.demographics_set.all(): + for i in [ + "first_name", "surname", "hospital_number", "date_of_birth" + ]: + result[i] = getattr(demographics, i) result["start"] = patient.min_start result["end"] = patient.max_end result["count"] = patient.episode_count - result["count"] = patient.episode_set.count() result["patient_id"] = patient.id result["categories"] = list(patient.episode_set.order_by( "category_name" diff --git a/search/tests/test_search_query_integration.py b/search/tests/test_search_query_integration.py index 3a36b75b..47a42dc5 100644 --- a/search/tests/test_search_query_integration.py +++ b/search/tests/test_search_query_integration.py @@ -8,7 +8,7 @@ from opal.tests.episodes import RestrictedEpisodeCategory from search.search_rules import SearchRule -from opal.models import Synonym, Gender, Patient +from opal.models import Synonym, Gender, Patient, Episode from opal.core.test import OpalTestCase @@ -365,6 +365,30 @@ def test_fuzzy_query(self): [patient_2, patient_3, patient_1] ) + def test_get_patients(self): + patient_1, episode_1 = self.new_patient_and_episode_please() + patient_2, episode_2 = self.new_patient_and_episode_please() + episode_3 = patient_1.create_episode() + + # these will not be used + self.new_patient_and_episode_please() + + query = queries.DatabaseQuery(self.user, [self.name_criteria]) + with patch.object(query, "get_episodes") as get_episodes: + get_episodes.return_value = Episode.objects.filter( + id__in=[episode_1.id, episode_2.id, episode_3.id] + ) + found = query.get_patients().values_list("id", flat=True) + self.assertEqual( + 2, found.count() + ) + self.assertEqual( + found[0], patient_1.id + ) + self.assertEqual( + found[1], patient_2.id + ) + def test_distinct_episodes_for_m2m_fields_containing_synonsyms_and_names( self ): From 749a6c6c418f7e438b7506064d717f35b10def14 Mon Sep 17 00:00:00 2001 From: fredkingham Date: Fri, 26 Oct 2018 17:00:17 +0100 Subject: [PATCH 4/4] fixes the number of queries run to display the search page, it was 35, now 15 --- search/tests/test_views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/search/tests/test_views.py b/search/tests/test_views.py index 9829c5ea..0b84de1c 100644 --- a/search/tests/test_views.py +++ b/search/tests/test_views.py @@ -220,7 +220,7 @@ def test_number_of_queries(self): "James", "Bond", str(i) ) - with self.assertNumQueries(35): + with self.assertNumQueries(15): self.get_response('{}/?query=Bond'.format(self.url)) for i in range(20): @@ -228,7 +228,7 @@ def test_number_of_queries(self): "James", "Blofelt", str(i) ) - with self.assertNumQueries(35): + with self.assertNumQueries(15): self.get_response('{}/?query=Blofelt'.format(self.url)) def test_with_multiple_patient_episodes(self):