diff --git a/gerrychain/updaters/locality_split_scores.py b/gerrychain/updaters/locality_split_scores.py index f1db424a..216d322c 100644 --- a/gerrychain/updaters/locality_split_scores.py +++ b/gerrychain/updaters/locality_split_scores.py @@ -50,7 +50,7 @@ def __init__(self, name, col_id, pop_col, score functions to compute at each step. This should be some subcollection of ```['num_parts', 'num_pieces', 'naked_boundary', 'shannon_entropy', 'power_entropy', - 'symmetric_entropy', 'num_split_localities']``` + 'symmetric_entropy', 'num_split_localities', 'split_pairs']``` :param pent_alpha: A number between 0 and 1 which is passed as the exponent to :meth:`~LocalitySplits.power_entropy` """ @@ -139,6 +139,9 @@ def __call__(self, partition): if s == 'num_split_localities': self.scores[s] = self.num_split_localities(partition) + if s == 'split_pairs': + self.scores[s] = self.split_pairs(partition) + return self.scores def num_parts(self, partition): @@ -355,3 +358,59 @@ def num_split_localities(self, partition): total_splits += 1 return total_splits + + def split_pairs(self, partition): + ''' + Calculates population-weighted split pairs score + + :param partition: The partition to be scored. + + :return: Proportion of the pairs of people in the same locality + who are split into different districts, population-weighted by locality + ''' + + # get list of districts + districts = dict(partition.parts).keys() + + # initialize dictionary keyed by locality, values are dicts of + # district : intersection_pop + district_pops_per_locality = {} + + # initialize inner dicts + for locality in self.localities: + district_pops_per_locality[locality] = {district: 0.0 + for district in districts} + + # for each district + for district in districts: + + # get the vtds assigned to this district in the partition + vtds = partition.parts[district] + + # for each vtd, add the population to the proper locality-district pair + for vtd in vtds: + district_pops_per_locality[self.localitydict[vtd]][district] += \ + partition.graph.nodes[vtd][self.pop_col] + + # initialize lists for split pairs scores by locality and populations + scores = [] + loc_pops = [] + + # for each locality + for locality in self.localities: + # grab the population in each district + pops = district_pops_per_locality[locality].values() + pops = [float(i) for i in pops] + + # get split pairs score for the locality + total_pop = sum(pops) + preserved_pairs = sum([i * (i - 1) / 2 for i in pops]) + all_pairs = total_pop * (total_pop - 1) / 2 + split_pairs_loc = 1 - preserved_pairs / all_pairs + + # append score and locality population to list + scores += [split_pairs_loc] + loc_pops += [total_pop] + + # return population-weighted average of split pairs score + return sum([scores[i] * p for i, p in enumerate(loc_pops)]) / sum(loc_pops) diff --git a/tests/updaters/test_split_scores.py b/tests/updaters/test_split_scores.py index c26a32de..71256c44 100644 --- a/tests/updaters/test_split_scores.py +++ b/tests/updaters/test_split_scores.py @@ -53,7 +53,7 @@ def partition(graph_with_counties): assignment={0: 1, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2, 6: 3, 7: 3, 8: 3}, updaters={"cut_edges":cut_edges, "splits": LocalitySplits("splittings", "county", "pop", ['num_parts', 'num_pieces', 'naked_boundary', 'shannon_entropy', 'power_entropy', - 'symmetric_entropy', 'num_split_localities'])}, + 'symmetric_entropy', 'num_split_localities', 'split_pairs'])}, ) return partition @@ -65,7 +65,7 @@ def split_partition(graph_with_counties): assignment={0: 1, 1: 2, 2: 3, 3: 1, 4: 2, 5: 3, 6: 1, 7: 2, 8: 3}, updaters={"cut_edges":cut_edges, "splits": LocalitySplits("splittings", "county", "pop", ['num_parts', 'num_pieces', 'naked_boundary', 'shannon_entropy', 'power_entropy', - 'symmetric_entropy', 'num_split_localities'])}, + 'symmetric_entropy', 'num_split_localities', 'split_pairs'])}, ) return partition @@ -88,6 +88,7 @@ def test_not_split(self, partition): assert result["power_entropy"] == 0 assert result["symmetric_entropy"] == 18 assert result["num_split_localities"] == 0 + assert result["split_pairs"] == 0 def test_is_split(self, split_partition): part = split_partition.updaters["splits"](split_partition) @@ -100,6 +101,7 @@ def test_is_split(self, split_partition): assert .6 > result["power_entropy"] > .5 assert 32 > result["symmetric_entropy"] > 31 assert result["num_split_localities"] == 3 + assert 1.01 > result["split_pairs"] > 0.99