Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] 광역의회 다양성 지수 계산, DB에 저장 #73

Merged
merged 2 commits into from
Nov 27, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 143 additions & 11 deletions analysis/diversity_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
from db.client import client


# ====================================
# Diversity index calculations
# ====================================


def count(data, stair=0):
"""
Returns a counter object of the data, while stairing them to appropriate bins if stair > 0
Expand Down Expand Up @@ -50,7 +55,12 @@ def shannon(data, stair=0, opts=True):
return sh_idx


def save_to_mongo(localId: int, factor: str, stair=0, opts=False) -> None:
# ====================================
# Local council diversity statistics
# ====================================


def save_to_mongo_local(localId: int, factor: str, stair=0, opts=False) -> None:
factor_field = {"age": "age", "gender": "gender", "party": "jdName"}
data = [
councilor[factor_field[factor]]
Expand All @@ -68,6 +78,7 @@ def save_to_mongo(localId: int, factor: str, stair=0, opts=False) -> None:
def calculate_rank_local(factor: str) -> None:
result = client["stats"]["diversity_index"].aggregate(
[
{"$match": {"localId": {"$ne": None}}},
{"$sort": {f"{factor}DiversityIndex": -1}},
{"$group": {"_id": "", "items": {"$push": "$$ROOT"}}},
{"$unwind": {"path": "$items", "includeArrayIndex": "items.rank"}},
Expand All @@ -82,12 +93,13 @@ def calculate_rank_local(factor: str) -> None:
)


def calculate_age_diversity_rank_history() -> None:
for councilor_type in ["elected", "candidate"]:
def calculate_age_diversity_rank_history_local() -> None:
for is_elected in [True, False]:
for localId in range(1, 227):
docs = client["stats"]["age_hist"].find(
{
"councilorType": councilor_type,
"councilorType": "local_councilor",
"is_elected": is_elected,
"method": "equal",
"level": 2,
"localId": localId,
Expand All @@ -104,7 +116,8 @@ def calculate_age_diversity_rank_history() -> None:
)
client["stats"]["age_hist"].find_one_and_update(
{
"councilorType": councilor_type,
"councilorType": "local_councilor",
"is_elected": is_elected,
"method": "equal",
"level": 2,
"localId": localId,
Expand All @@ -120,7 +133,8 @@ def calculate_age_diversity_rank_history() -> None:
[
{
"$match": {
"councilorType": councilor_type,
"councilorType": "local_councilor",
"is_elected": is_elected,
"method": "equal",
"level": 2,
"year": year,
Expand All @@ -136,7 +150,8 @@ def calculate_age_diversity_rank_history() -> None:
for doc in result:
client["stats"]["age_hist"].find_one_and_update(
{
"councilorType": councilor_type,
"councilorType": "local_councilor",
"is_elected": is_elected,
"method": "equal",
"level": 2,
"localId": doc["localId"],
Expand All @@ -146,12 +161,129 @@ def calculate_age_diversity_rank_history() -> None:
)


# ====================================
# Metro council diversity statistics
# ====================================


def save_to_mongo_metro(metroId: int, factor: str, stair=0, opts=False) -> None:
factor_field = {"age": "age", "gender": "gender", "party": "jdName"}
data = [
councilor[factor_field[factor]]
for councilor in client["council"]["metro_councilor"].find({"metroId": metroId})
]
# print(f"{metroId} {factor}")
# print(data)
client["stats"].get_collection("diversity_index").update_one(
{"metroId": metroId},
{"$set": {f"{factor}DiversityIndex": gini_simpson(data, stair, opts)}},
upsert=True,
)


def calculate_rank_metro(factor: str) -> None:
result = client["stats"]["diversity_index"].aggregate(
[
{"$match": {"metroId": {"$ne": None}}},
{"$sort": {f"{factor}DiversityIndex": -1}},
{"$group": {"_id": "", "items": {"$push": "$$ROOT"}}},
{"$unwind": {"path": "$items", "includeArrayIndex": "items.rank"}},
{"$replaceRoot": {"newRoot": "$items"}},
{"$addFields": {"rank": {"$add": ["$rank", 1]}}},
]
)
for doc in result:
client["stats"]["diversity_index"].find_one_and_update(
{"metroId": doc["metroId"]},
{"$set": {f"{factor}DiversityRank": int(doc["rank"])}},
)


def calculate_age_diversity_rank_history_metro() -> None:
for is_elected in [True, False]:
for metroId in range(1, 18):
docs = client["stats"]["age_hist"].find(
{
"councilorType": "metro_councilor",
"method": "equal",
"level": 1,
"is_elected": is_elected,
"metroId": metroId,
}
)
for doc in docs:
diversity_index = gini_simpson(
[
group["minAge"]
for group in doc["data"]
for _ in range(group["count"])
],
stair=10,
)
client["stats"]["age_hist"].find_one_and_update(
{
"councilorType": "metro_councilor",
"method": "equal",
"level": 1,
"is_elected": is_elected,
"metroId": metroId,
"year": doc["year"],
},
{"$set": {"diversityIndex": diversity_index}},
)

years = list({doc["year"] for doc in client["stats"]["age_hist"].find()})

for year in years:
result = client["stats"]["age_hist"].aggregate(
[
{
"$match": {
"councilorType": "metro_councilor",
"method": "equal",
"level": 1,
"is_elected": is_elected,
"year": year,
}
},
{"$sort": {"diversityIndex": -1}},
{"$group": {"_id": "", "items": {"$push": "$$ROOT"}}},
{"$unwind": {"path": "$items", "includeArrayIndex": "items.rank"}},
{"$replaceRoot": {"newRoot": "$items"}},
{"$addFields": {"rank": {"$add": ["$rank", 1]}}},
]
)
for doc in result:
client["stats"]["age_hist"].find_one_and_update(
{
"councilorType": "metro_councilor",
"method": "equal",
"level": 1,
"is_elected": is_elected,
"metroId": doc["metroId"],
"year": year,
},
{"$set": {"diversityRank": int(doc["rank"])}},
)


if __name__ == "__main__":
# for localId in range(1, 227):
# save_to_mongo(localId, "age", stair=10)
# save_to_mongo(localId, "gender")
# save_to_mongo(localId, "party")
# save_to_mongo_local(localId, "age", stair=10)
# save_to_mongo_local(localId, "gender")
# save_to_mongo_local(localId, "party")
# calculate_rank_local("age")
# calculate_rank_local("gender")
# calculate_rank_local("party")
calculate_age_diversity_rank_history()
# calculate_age_diversity_rank_history_local()

# for metroId in range(1, 18):
# if metroId in [8, 17]:
# continue
# save_to_mongo_metro(metroId, "age", stair=10)
# save_to_mongo_metro(metroId, "gender")
# save_to_mongo_metro(metroId, "party")
# calculate_rank_metro("age")
# calculate_rank_metro("gender")
# calculate_rank_metro("party")
calculate_age_diversity_rank_history_metro()