From 69bae9ac2c1a710703958771c6c1e4ebe1a17b47 Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Sat, 25 Nov 2023 17:06:36 +0900 Subject: [PATCH 1/2] =?UTF-8?q?[feat]=20=EA=B4=91=EC=97=AD=EC=9D=98?= =?UTF-8?q?=ED=9A=8C=20=EB=8B=A4=EC=96=91=EC=84=B1=20=EC=A7=80=EC=88=98=20?= =?UTF-8?q?=EA=B3=84=EC=82=B0,=20DB=EC=97=90=20=EC=A0=80=EC=9E=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/diversity_db.py | 136 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 6 deletions(-) diff --git a/analysis/diversity_db.py b/analysis/diversity_db.py index c0ae7e8..165c0f7 100644 --- a/analysis/diversity_db.py +++ b/analysis/diversity_db.py @@ -5,6 +5,11 @@ from db.client import client +# ==================================== +# Diversity index calculations +# ==================================== + + def count(data, stair=0): """ Returns a counter object of the data, while stairing them to appropriate bins if stair > 0 @@ -50,7 +55,12 @@ def shannon(data, stair=0, opts=True): return sh_idx -def save_to_mongo(localId: int, factor: str, stair=0, opts=False) -> None: +# ==================================== +# Local council diversity statistics +# ==================================== + + +def save_to_mongo_local(localId: int, factor: str, stair=0, opts=False) -> None: factor_field = {"age": "age", "gender": "gender", "party": "jdName"} data = [ councilor[factor_field[factor]] @@ -68,6 +78,7 @@ def save_to_mongo(localId: int, factor: str, stair=0, opts=False) -> None: def calculate_rank_local(factor: str) -> None: result = client["stats"]["diversity_index"].aggregate( [ + {"$match": {"localId": {"$ne": None}}}, {"$sort": {f"{factor}DiversityIndex": -1}}, {"$group": {"_id": "", "items": {"$push": "$$ROOT"}}}, {"$unwind": {"path": "$items", "includeArrayIndex": "items.rank"}}, @@ -82,7 +93,7 @@ def calculate_rank_local(factor: str) -> None: ) -def calculate_age_diversity_rank_history() -> None: +def calculate_age_diversity_rank_history_local() -> None: for councilor_type in ["elected", "candidate"]: for localId in range(1, 227): docs = client["stats"]["age_hist"].find( @@ -146,12 +157,125 @@ def calculate_age_diversity_rank_history() -> None: ) +# ==================================== +# Metro council diversity statistics +# ==================================== + + +def save_to_mongo_metro(metroId: int, factor: str, stair=0, opts=False) -> None: + factor_field = {"age": "age", "gender": "gender", "party": "jdName"} + data = [ + councilor[factor_field[factor]] + for councilor in client["council"]["metro_councilor"].find({"metroId": metroId}) + ] + # print(f"{metroId} {factor}") + # print(data) + client["stats"].get_collection("diversity_index").update_one( + {"metroId": metroId}, + {"$set": {f"{factor}DiversityIndex": gini_simpson(data, stair, opts)}}, + upsert=True, + ) + + +def calculate_rank_metro(factor: str) -> None: + result = client["stats"]["diversity_index"].aggregate( + [ + {"$match": {"metroId": {"$ne": None}}}, + {"$sort": {f"{factor}DiversityIndex": -1}}, + {"$group": {"_id": "", "items": {"$push": "$$ROOT"}}}, + {"$unwind": {"path": "$items", "includeArrayIndex": "items.rank"}}, + {"$replaceRoot": {"newRoot": "$items"}}, + {"$addFields": {"rank": {"$add": ["$rank", 1]}}}, + ] + ) + for doc in result: + client["stats"]["diversity_index"].find_one_and_update( + {"metroId": doc["metroId"]}, + {"$set": {f"{factor}DiversityRank": int(doc["rank"])}}, + ) + + +def calculate_age_diversity_rank_history_metro() -> None: + for councilor_type in ["elected", "candidate"]: + for metroId in range(1, 18): + docs = client["stats"]["age_hist"].find( + { + "councilorType": councilor_type, + "method": "equal", + "level": 1, + "metroId": metroId, + } + ) + for doc in docs: + diversity_index = gini_simpson( + [ + group["minAge"] + for group in doc["data"] + for _ in range(group["count"]) + ], + stair=10, + ) + client["stats"]["age_hist"].find_one_and_update( + { + "councilorType": councilor_type, + "method": "equal", + "level": 1, + "metroId": metroId, + "year": doc["year"], + }, + {"$set": {"diversityIndex": diversity_index}}, + ) + + years = list({doc["year"] for doc in client["stats"]["age_hist"].find()}) + + for year in years: + result = client["stats"]["age_hist"].aggregate( + [ + { + "$match": { + "councilorType": councilor_type, + "method": "equal", + "level": 1, + "year": year, + } + }, + {"$sort": {"diversityIndex": -1}}, + {"$group": {"_id": "", "items": {"$push": "$$ROOT"}}}, + {"$unwind": {"path": "$items", "includeArrayIndex": "items.rank"}}, + {"$replaceRoot": {"newRoot": "$items"}}, + {"$addFields": {"rank": {"$add": ["$rank", 1]}}}, + ] + ) + for doc in result: + client["stats"]["age_hist"].find_one_and_update( + { + "councilorType": councilor_type, + "method": "equal", + "level": 1, + "metroId": doc["metroId"], + "year": year, + }, + {"$set": {"diversityRank": int(doc["rank"])}}, + ) + + if __name__ == "__main__": # for localId in range(1, 227): - # save_to_mongo(localId, "age", stair=10) - # save_to_mongo(localId, "gender") - # save_to_mongo(localId, "party") + # save_to_mongo_local(localId, "age", stair=10) + # save_to_mongo_local(localId, "gender") + # save_to_mongo_local(localId, "party") # calculate_rank_local("age") # calculate_rank_local("gender") # calculate_rank_local("party") - calculate_age_diversity_rank_history() + calculate_age_diversity_rank_history_local() + + # for metroId in range(1, 18): + # if metroId in [8, 17]: + # continue + # save_to_mongo_metro(metroId, "age", stair=10) + # save_to_mongo_metro(metroId, "gender") + # save_to_mongo_metro(metroId, "party") + # calculate_rank_metro("age") + # calculate_rank_metro("gender") + # calculate_rank_metro("party") + # calculate_age_diversity_rank_history_metro() From e329e87fe60d64b6af09cbc826e58bc7b7a330fa Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Mon, 27 Nov 2023 14:12:49 +0900 Subject: [PATCH 2/2] =?UTF-8?q?[fix]=20=EB=8B=A4=EC=96=91=EC=84=B1=20?= =?UTF-8?q?=EC=A7=80=EC=88=98=20=EA=B3=84=EC=82=B0=20=ED=95=A8=EC=88=98=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/diversity_db.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/analysis/diversity_db.py b/analysis/diversity_db.py index 165c0f7..fd8fe01 100644 --- a/analysis/diversity_db.py +++ b/analysis/diversity_db.py @@ -94,11 +94,12 @@ def calculate_rank_local(factor: str) -> None: def calculate_age_diversity_rank_history_local() -> None: - for councilor_type in ["elected", "candidate"]: + for is_elected in [True, False]: for localId in range(1, 227): docs = client["stats"]["age_hist"].find( { - "councilorType": councilor_type, + "councilorType": "local_councilor", + "is_elected": is_elected, "method": "equal", "level": 2, "localId": localId, @@ -115,7 +116,8 @@ def calculate_age_diversity_rank_history_local() -> None: ) client["stats"]["age_hist"].find_one_and_update( { - "councilorType": councilor_type, + "councilorType": "local_councilor", + "is_elected": is_elected, "method": "equal", "level": 2, "localId": localId, @@ -131,7 +133,8 @@ def calculate_age_diversity_rank_history_local() -> None: [ { "$match": { - "councilorType": councilor_type, + "councilorType": "local_councilor", + "is_elected": is_elected, "method": "equal", "level": 2, "year": year, @@ -147,7 +150,8 @@ def calculate_age_diversity_rank_history_local() -> None: for doc in result: client["stats"]["age_hist"].find_one_and_update( { - "councilorType": councilor_type, + "councilorType": "local_councilor", + "is_elected": is_elected, "method": "equal", "level": 2, "localId": doc["localId"], @@ -196,13 +200,14 @@ def calculate_rank_metro(factor: str) -> None: def calculate_age_diversity_rank_history_metro() -> None: - for councilor_type in ["elected", "candidate"]: + for is_elected in [True, False]: for metroId in range(1, 18): docs = client["stats"]["age_hist"].find( { - "councilorType": councilor_type, + "councilorType": "metro_councilor", "method": "equal", "level": 1, + "is_elected": is_elected, "metroId": metroId, } ) @@ -217,9 +222,10 @@ def calculate_age_diversity_rank_history_metro() -> None: ) client["stats"]["age_hist"].find_one_and_update( { - "councilorType": councilor_type, + "councilorType": "metro_councilor", "method": "equal", "level": 1, + "is_elected": is_elected, "metroId": metroId, "year": doc["year"], }, @@ -233,9 +239,10 @@ def calculate_age_diversity_rank_history_metro() -> None: [ { "$match": { - "councilorType": councilor_type, + "councilorType": "metro_councilor", "method": "equal", "level": 1, + "is_elected": is_elected, "year": year, } }, @@ -249,9 +256,10 @@ def calculate_age_diversity_rank_history_metro() -> None: for doc in result: client["stats"]["age_hist"].find_one_and_update( { - "councilorType": councilor_type, + "councilorType": "metro_councilor", "method": "equal", "level": 1, + "is_elected": is_elected, "metroId": doc["metroId"], "year": year, }, @@ -267,7 +275,7 @@ def calculate_age_diversity_rank_history_metro() -> None: # calculate_rank_local("age") # calculate_rank_local("gender") # calculate_rank_local("party") - calculate_age_diversity_rank_history_local() + # calculate_age_diversity_rank_history_local() # for metroId in range(1, 18): # if metroId in [8, 17]: @@ -278,4 +286,4 @@ def calculate_age_diversity_rank_history_metro() -> None: # calculate_rank_metro("age") # calculate_rank_metro("gender") # calculate_rank_metro("party") - # calculate_age_diversity_rank_history_metro() + calculate_age_diversity_rank_history_metro()