Skip to content

Commit

Permalink
WIP 2023 scorecards import
Browse files Browse the repository at this point in the history
  • Loading branch information
struan committed Jul 13, 2023
1 parent 42d5ef1 commit 41806cd
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 29 deletions.
11 changes: 10 additions & 1 deletion caps/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.contrib import admin
from scoring.models import PlanSection

from caps.models import Council, PlanDocument, DataType, DataPoint
from caps.models import Council, DataPoint, DataType, PlanDocument


class CouncilAdmin(admin.ModelAdmin):
Expand Down Expand Up @@ -45,3 +46,11 @@ class DataPointAdmin(admin.ModelAdmin):


admin.site.register(DataPoint, DataPointAdmin)


class PlanSectionAdmin(admin.ModelAdmin):
list_display = ("description", "code", "year")
list_filter = ("year",)


admin.site.register(PlanSection, PlanSectionAdmin)
2 changes: 1 addition & 1 deletion proj/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
RELATED_SEARCH_THRESHOLD = 0.6
RELATED_SEARCH_THRESHOLD_LOOSE = 0.5

PLAN_YEAR = 2021
PLAN_YEAR = 2023

PLAN_SCORECARD_DATASET_DETAILS = {
"org": "mysociety",
Expand Down
179 changes: 179 additions & 0 deletions scoring/management/commands/import_actions_scores.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import math
import os
import re
import shutil
import tempfile
import zipfile
from datetime import date
from os.path import join
from pathlib import Path
from typing import Optional, Union

import pandas as pd
import requests
import urllib3
from caps.models import Council
from caps.utils import char_from_text, integer_from_text
from django.conf import settings
from django.core.files import File
from django.core.management.base import BaseCommand, CommandError
from django.db.models import F, Sum
from django.template.defaultfilters import pluralize
from scoring.models import (PlanQuestion, PlanQuestionScore, PlanScore,
PlanSection, PlanSectionScore)


class Command(BaseCommand):
help = "Imports plan scores"

YEAR = 2023 # settings.PLAN_YEAR
SCORECARD_DATA_DIR = Path(settings.DATA_DIR, "scorecard_data", str(YEAR))
SECTION_SCORES_CSV = Path(SCORECARD_DATA_DIR, "raw_sections_marks.csv")
OVERALL_SCORES_CSV = Path(SCORECARD_DATA_DIR, "all_section_scores.csv")

DEFAULT_TOP_PERFORMER_COUNT = 10
TOP_PERFORMER_COUNT = {
"district": 3,
"county": 1,
"single": 3,
"northern-ireland": 0,
"combined": 1,
}

SKIP_SECTION_PERFORMERS = ["s6_cb"]

SECTIONS = {
"s1_b_h": "Buildings & Heating",
"s2_tran": "Transport",
"s3_p_lu": "Planning & Land Use",
"s4_g_f": "Governance & Finance",
"s5_bio": "Biodiversity",
"s6_c_e": "Collaboration & Engagement",
"s7_wr_f": "Waste Reduction & Food",
"s1_b_h_gs_ca": "Buildings & Heating & Green Skills (CA)",
"s2_tran_ca": "Transport (CA)",
"s3_p_b_ca": "Planning & Biodiversity (CA)",
"s4_g_f_ca": "Governance & Finance (CA)",
"s5_c_e_ca": "Collaboration & Engagement (CA)",
}

def create_sections(self):
for code, desc in self.SECTIONS.items():
section, created = PlanSection.objects.get_or_create(
code=code,
description=desc,
year=self.YEAR,
)

def import_section_scores(self):
council_scores = {}

df = pd.read_csv(self.SECTION_SCORES_CSV)
for index, row in df.iterrows():
if row["section"] == "overall":
continue

# update the section max_score as we go
section = PlanSection.objects.get(
description=row["section"],
)

try:
council = Council.objects.get(gss_code=row["council"])
except Council.DoesNotExist:
print("Did not find council in db: {}".format(row["council"]))
continue

plan_score, created = PlanScore.objects.get_or_create(
council=council, year=self.YEAR
)

score = 0
if not pd.isnull(row["score"]):
score = integer_from_text(row["score"])

max_score = integer_from_text(row["max_score"])

section_score, created = PlanSectionScore.objects.get_or_create(
plan_section=section,
plan_score=plan_score,
)

section_score.max_score = max_score
section_score.score = score
section_score.save()

def import_overall_scores(self):
df = pd.read_csv(self.OVERALL_SCORES_CSV)
for index, row in df.iterrows():
try:
council = Council.objects.get(gss_code=row["gss"])
except Council.DoesNotExist:
print("Did not find council in db: {}".format(row["name"]))
continue

plan_score, created = PlanScore.objects.get_or_create(
council=council, year=self.YEAR
)

plan_score.total = round(row["raw_total"] * 100, 3)
plan_score.weighted_total = round(row["weighted_total"] * 100, 3)
plan_score.save()

for desc in self.SECTIONS.values():
if not pd.isnull(row[desc]):
section = PlanSection.objects.get(description=desc)

section_score, created = PlanSectionScore.objects.get_or_create(
plan_section=section,
plan_score=plan_score,
)

section_score.weighted_score = round(row[desc] * 100)
section_score.save()

def label_top_performers(self):
plan_sections = PlanSection.objects.filter(year=2021)

# reset top performers
PlanScore.objects.update(top_performer="")
PlanSectionScore.objects.update(top_performer="")

for group in Council.SCORING_GROUP_CHOICES:
group_tag = group[0]

count = self.TOP_PERFORMER_COUNT.get(
group_tag, self.DEFAULT_TOP_PERFORMER_COUNT
)
if count == 0:
continue

group_params = Council.SCORING_GROUPS[group_tag]

top_plan_scores = PlanScore.objects.filter(
council__authority_type__in=group_params["types"],
council__country__in=group_params["countries"],
weighted_total__gt=0,
).order_by("-weighted_total")

for plan_score in top_plan_scores.all()[:count]:
plan_score.top_performer = group_tag
plan_score.save()

for section in plan_sections.all():
if section.code in self.SKIP_SECTION_PERFORMERS:
continue

top_section_scores = PlanSectionScore.objects.filter(
plan_section=section, score=F("max_score")
)

for section_score in top_section_scores.all():
section_score.top_performer = section.code
section_score.save()

def handle(self, *args, **options):
self.create_sections()
self.import_section_scores()
self.import_overall_scores()
self.label_top_performers()
47 changes: 20 additions & 27 deletions scoring/views.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,23 @@
from collections import defaultdict
from datetime import date

from django.views.generic import DetailView, TemplateView
from caps.models import Council, Promise
from caps.views import BaseLocationResultsView
from django.conf import settings
from django.contrib.auth.views import LoginView, LogoutView
from django.db.models import Subquery, OuterRef, Count, Sum, F
from django.db.models import Count, F, OuterRef, Subquery, Sum
from django.shortcuts import resolve_url
from django.utils.text import Truncator
from django.utils.decorators import method_decorator
from django.utils.text import Truncator
from django.views.decorators.cache import cache_control
from django.conf import settings

from django.views.generic import DetailView, TemplateView
from django_filters.views import FilterView

from caps.models import Council, Promise
from scoring.models import (
PlanScore,
PlanScoreDocument,
PlanSection,
PlanSectionScore,
PlanQuestion,
PlanQuestionScore,
)
from scoring.filters import PlanScoreFilter, QuestionScoreFilter

from scoring.forms import ScoringSort

from caps.views import BaseLocationResultsView
from scoring.mixins import CheckForDownPageMixin, AdvancedFilterMixin
from scoring.mixins import AdvancedFilterMixin, CheckForDownPageMixin
from scoring.models import (PlanQuestion, PlanQuestionScore, PlanScore,
PlanScoreDocument, PlanSection, PlanSectionScore)

cache_settings = {
"max-age": 60,
Expand Down Expand Up @@ -68,14 +59,14 @@ def get_queryset(self):
authority_type = self.get_authority_type()
qs = Council.objects.annotate(
score=Subquery(
PlanScore.objects.filter(council_id=OuterRef("id"), year="2021").values(
"weighted_total"
)
PlanScore.objects.filter(
council_id=OuterRef("id"), year=settings.PLAN_YEAR
).values("weighted_total")
),
top_performer=Subquery(
PlanScore.objects.filter(council_id=OuterRef("id"), year="2021").values(
"top_performer"
)
PlanScore.objects.filter(
council_id=OuterRef("id"), year=settings.PLAN_YEAR
).values("top_performer")
),
).order_by(F("score").desc(nulls_last=True))

Expand All @@ -95,7 +86,9 @@ def get_context_data(self, **kwargs):
authority_type = self.get_authority_type()

councils = context["object_list"].values()
context["plan_sections"] = PlanSection.objects.filter(year=2021).all()
context["plan_sections"] = PlanSection.objects.filter(
year=settings.PLAN_YEAR
).all()

context = self.setup_filter_context(context, context["filter"], authority_type)

Expand Down Expand Up @@ -199,7 +192,7 @@ def get_context_data(self, **kwargs):
)

promises = Promise.objects.filter(council=council).all()
plan_score = PlanScore.objects.get(council=council, year=2021)
plan_score = PlanScore.objects.get(council=council, year=settings.PLAN_YEAR)
plan_urls = PlanScoreDocument.objects.filter(plan_score=plan_score)
sections = PlanSectionScore.sections_for_council(
council=council, plan_year=settings.PLAN_YEAR
Expand Down Expand Up @@ -487,7 +480,7 @@ def get_context_data(self, **kwargs):
# questions = PlanQuestion.objects.all()
# sections = PlanSection.objects.all()

section_qs = PlanSection.objects.filter(year=2021)
section_qs = PlanSection.objects.filter(year=settings.PLAN_YEAR)

sections = {}
for section in section_qs.all():
Expand Down

0 comments on commit 41806cd

Please sign in to comment.