WIP 2023 scorecards import

mysociety · Jul 13, 2023 · 41806cd · 41806cd
1 parent 42d5ef1
commit 41806cd
Show file tree

Hide file tree

Showing 4 changed files with 210 additions and 29 deletions.
diff --git a/caps/admin.py b/caps/admin.py
@@ -1,6 +1,7 @@
 from django.contrib import admin
+from scoring.models import PlanSection
 
-from caps.models import Council, PlanDocument, DataType, DataPoint
+from caps.models import Council, DataPoint, DataType, PlanDocument
 
 
 class CouncilAdmin(admin.ModelAdmin):
@@ -45,3 +46,11 @@ class DataPointAdmin(admin.ModelAdmin):
 
 
 admin.site.register(DataPoint, DataPointAdmin)
+
+
+class PlanSectionAdmin(admin.ModelAdmin):
+    list_display = ("description", "code", "year")
+    list_filter = ("year",)
+
+
+admin.site.register(PlanSection, PlanSectionAdmin)
diff --git a/proj/settings.py b/proj/settings.py
@@ -158,7 +158,7 @@
 RELATED_SEARCH_THRESHOLD = 0.6
 RELATED_SEARCH_THRESHOLD_LOOSE = 0.5
 
-PLAN_YEAR = 2021
+PLAN_YEAR = 2023
 
 PLAN_SCORECARD_DATASET_DETAILS = {
     "org": "mysociety",

diff --git a/scoring/management/commands/import_actions_scores.py b/scoring/management/commands/import_actions_scores.py
@@ -0,0 +1,179 @@
+import math
+import os
+import re
+import shutil
+import tempfile
+import zipfile
+from datetime import date
+from os.path import join
+from pathlib import Path
+from typing import Optional, Union
+
+import pandas as pd
+import requests
+import urllib3
+from caps.models import Council
+from caps.utils import char_from_text, integer_from_text
+from django.conf import settings
+from django.core.files import File
+from django.core.management.base import BaseCommand, CommandError
+from django.db.models import F, Sum
+from django.template.defaultfilters import pluralize
+from scoring.models import (PlanQuestion, PlanQuestionScore, PlanScore,
+                            PlanSection, PlanSectionScore)
+
+
+class Command(BaseCommand):
+    help = "Imports plan scores"
+
+    YEAR = 2023  # settings.PLAN_YEAR
+    SCORECARD_DATA_DIR = Path(settings.DATA_DIR, "scorecard_data", str(YEAR))
+    SECTION_SCORES_CSV = Path(SCORECARD_DATA_DIR, "raw_sections_marks.csv")
+    OVERALL_SCORES_CSV = Path(SCORECARD_DATA_DIR, "all_section_scores.csv")
+
+    DEFAULT_TOP_PERFORMER_COUNT = 10
+    TOP_PERFORMER_COUNT = {
+        "district": 3,
+        "county": 1,
+        "single": 3,
+        "northern-ireland": 0,
+        "combined": 1,
+    }
+
+    SKIP_SECTION_PERFORMERS = ["s6_cb"]
+
+    SECTIONS = {
+        "s1_b_h": "Buildings & Heating",
+        "s2_tran": "Transport",
+        "s3_p_lu": "Planning & Land Use",
+        "s4_g_f": "Governance & Finance",
+        "s5_bio": "Biodiversity",
+        "s6_c_e": "Collaboration & Engagement",
+        "s7_wr_f": "Waste Reduction & Food",
+        "s1_b_h_gs_ca": "Buildings & Heating & Green Skills (CA)",
+        "s2_tran_ca": "Transport (CA)",
+        "s3_p_b_ca": "Planning & Biodiversity (CA)",
+        "s4_g_f_ca": "Governance & Finance (CA)",
+        "s5_c_e_ca": "Collaboration & Engagement (CA)",
+    }
+
+    def create_sections(self):
+        for code, desc in self.SECTIONS.items():
+            section, created = PlanSection.objects.get_or_create(
+                code=code,
+                description=desc,
+                year=self.YEAR,
+            )
+
+    def import_section_scores(self):
+        council_scores = {}
+
+        df = pd.read_csv(self.SECTION_SCORES_CSV)
+        for index, row in df.iterrows():
+            if row["section"] == "overall":
+                continue
+
+            # update the section max_score as we go
+            section = PlanSection.objects.get(
+                description=row["section"],
+            )
+
+            try:
+                council = Council.objects.get(gss_code=row["council"])
+            except Council.DoesNotExist:
+                print("Did not find council in db: {}".format(row["council"]))
+                continue
+
+            plan_score, created = PlanScore.objects.get_or_create(
+                council=council, year=self.YEAR
+            )
+
+            score = 0
+            if not pd.isnull(row["score"]):
+                score = integer_from_text(row["score"])
+
+            max_score = integer_from_text(row["max_score"])
+
+            section_score, created = PlanSectionScore.objects.get_or_create(
+                plan_section=section,
+                plan_score=plan_score,
+            )
+
+            section_score.max_score = max_score
+            section_score.score = score
+            section_score.save()
+
+    def import_overall_scores(self):
+        df = pd.read_csv(self.OVERALL_SCORES_CSV)
+        for index, row in df.iterrows():
+            try:
+                council = Council.objects.get(gss_code=row["gss"])
+            except Council.DoesNotExist:
+                print("Did not find council in db: {}".format(row["name"]))
+                continue
+
+            plan_score, created = PlanScore.objects.get_or_create(
+                council=council, year=self.YEAR
+            )
+
+            plan_score.total = round(row["raw_total"] * 100, 3)
+            plan_score.weighted_total = round(row["weighted_total"] * 100, 3)
+            plan_score.save()
+
+            for desc in self.SECTIONS.values():
+                if not pd.isnull(row[desc]):
+                    section = PlanSection.objects.get(description=desc)
+
+                    section_score, created = PlanSectionScore.objects.get_or_create(
+                        plan_section=section,
+                        plan_score=plan_score,
+                    )
+
+                    section_score.weighted_score = round(row[desc] * 100)
+                    section_score.save()
+
+    def label_top_performers(self):
+        plan_sections = PlanSection.objects.filter(year=2021)
+
+        # reset top performers
+        PlanScore.objects.update(top_performer="")
+        PlanSectionScore.objects.update(top_performer="")
+
+        for group in Council.SCORING_GROUP_CHOICES:
+            group_tag = group[0]
+
+            count = self.TOP_PERFORMER_COUNT.get(
+                group_tag, self.DEFAULT_TOP_PERFORMER_COUNT
+            )
+            if count == 0:
+                continue
+
+            group_params = Council.SCORING_GROUPS[group_tag]
+
+            top_plan_scores = PlanScore.objects.filter(
+                council__authority_type__in=group_params["types"],
+                council__country__in=group_params["countries"],
+                weighted_total__gt=0,
+            ).order_by("-weighted_total")
+
+            for plan_score in top_plan_scores.all()[:count]:
+                plan_score.top_performer = group_tag
+                plan_score.save()
+
+        for section in plan_sections.all():
+            if section.code in self.SKIP_SECTION_PERFORMERS:
+                continue
+
+            top_section_scores = PlanSectionScore.objects.filter(
+                plan_section=section, score=F("max_score")
+            )
+
+            for section_score in top_section_scores.all():
+                section_score.top_performer = section.code
+                section_score.save()
+
+    def handle(self, *args, **options):
+        self.create_sections()
+        self.import_section_scores()
+        self.import_overall_scores()
+        self.label_top_performers()
diff --git a/scoring/views.py b/scoring/views.py
@@ -1,32 +1,23 @@
 from collections import defaultdict
 from datetime import date
 
-from django.views.generic import DetailView, TemplateView
+from caps.models import Council, Promise
+from caps.views import BaseLocationResultsView
+from django.conf import settings
 from django.contrib.auth.views import LoginView, LogoutView
-from django.db.models import Subquery, OuterRef, Count, Sum, F
+from django.db.models import Count, F, OuterRef, Subquery, Sum
 from django.shortcuts import resolve_url
-from django.utils.text import Truncator
 from django.utils.decorators import method_decorator
+from django.utils.text import Truncator
 from django.views.decorators.cache import cache_control
-from django.conf import settings
-
+from django.views.generic import DetailView, TemplateView
 from django_filters.views import FilterView
 
-from caps.models import Council, Promise
-from scoring.models import (
-    PlanScore,
-    PlanScoreDocument,
-    PlanSection,
-    PlanSectionScore,
-    PlanQuestion,
-    PlanQuestionScore,
-)
 from scoring.filters import PlanScoreFilter, QuestionScoreFilter
-
 from scoring.forms import ScoringSort
-
-from caps.views import BaseLocationResultsView
-from scoring.mixins import CheckForDownPageMixin, AdvancedFilterMixin
+from scoring.mixins import AdvancedFilterMixin, CheckForDownPageMixin
+from scoring.models import (PlanQuestion, PlanQuestionScore, PlanScore,
+                            PlanScoreDocument, PlanSection, PlanSectionScore)
 
 cache_settings = {
     "max-age": 60,
@@ -68,14 +59,14 @@ def get_queryset(self):
         authority_type = self.get_authority_type()
         qs = Council.objects.annotate(
             score=Subquery(
-                PlanScore.objects.filter(council_id=OuterRef("id"), year="2021").values(
-                    "weighted_total"
-                )
+                PlanScore.objects.filter(
+                    council_id=OuterRef("id"), year=settings.PLAN_YEAR
+                ).values("weighted_total")
             ),
             top_performer=Subquery(
-                PlanScore.objects.filter(council_id=OuterRef("id"), year="2021").values(
-                    "top_performer"
-                )
+                PlanScore.objects.filter(
+                    council_id=OuterRef("id"), year=settings.PLAN_YEAR
+                ).values("top_performer")
             ),
         ).order_by(F("score").desc(nulls_last=True))
 
@@ -95,7 +86,9 @@ def get_context_data(self, **kwargs):
         authority_type = self.get_authority_type()
 
         councils = context["object_list"].values()
-        context["plan_sections"] = PlanSection.objects.filter(year=2021).all()
+        context["plan_sections"] = PlanSection.objects.filter(
+            year=settings.PLAN_YEAR
+        ).all()
 
         context = self.setup_filter_context(context, context["filter"], authority_type)
 
@@ -199,7 +192,7 @@ def get_context_data(self, **kwargs):
         )
 
         promises = Promise.objects.filter(council=council).all()
-        plan_score = PlanScore.objects.get(council=council, year=2021)
+        plan_score = PlanScore.objects.get(council=council, year=settings.PLAN_YEAR)
         plan_urls = PlanScoreDocument.objects.filter(plan_score=plan_score)
         sections = PlanSectionScore.sections_for_council(
             council=council, plan_year=settings.PLAN_YEAR
@@ -487,7 +480,7 @@ def get_context_data(self, **kwargs):
         # questions = PlanQuestion.objects.all()
         # sections = PlanSection.objects.all()
 
-        section_qs = PlanSection.objects.filter(year=2021)
+        section_qs = PlanSection.objects.filter(year=settings.PLAN_YEAR)
 
         sections = {}
         for section in section_qs.all():