-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmissing.py
47 lines (37 loc) · 1.95 KB
/
missing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
raw_poverty_data = pd.read_csv("data/poverty.csv")
raw_human_dev_data = pd.read_csv("data/human_development.csv")
raw_gender_inequality_data = pd.read_csv("data/gender-inequality-index.csv")
raw_world_happiness_data = pd.read_csv("data/happiness.csv")
raw_sexual_orientation_data = pd.read_csv("data/lgbtq_inclusiveness.csv", on_bad_lines='skip')
raw_population_data = pd.read_csv("data/population.csv")
raw_religion_data = pd.read_csv("data/religion.csv")
pov_data = raw_poverty_data[["Country","PercentPoverty"]].dropna()
hd_data = raw_human_dev_data[["Country","HDI"]].dropna()
gi_data = raw_gender_inequality_data[["Country","gii"]].dropna()
wh_data = raw_world_happiness_data[["Country","WHScore"]].dropna()
so_data = raw_sexual_orientation_data[["Country","SOScore"]].dropna()
pop_data = raw_population_data[["Country","PopulationPercent"]].dropna()
re_data = raw_religion_data[["Country", "Religion"]].dropna()
def lowertrim(val):
return val.strip().lower()
pov_data['Country'] = pov_data['Country'].apply(lowertrim)
hd_data['Country'] = hd_data['Country'].apply(lowertrim)
gi_data['Country'] = gi_data['Country'].apply(lowertrim)
wh_data['Country'] = wh_data['Country'].apply(lowertrim)
so_data['Country'] = so_data['Country'].apply(lowertrim)
pop_data['Country'] = pop_data['Country'].apply(lowertrim)
re_data['Country'] = re_data['Country'].apply(lowertrim)
countries = set(hd_data["Country"].values.tolist())
countries &= set(pov_data["Country"].values.tolist())
countries &= set(gi_data["Country"].values.tolist())
countries &= set(wh_data["Country"].values.tolist())
countries &= set(so_data["Country"].values.tolist())
countries &= set(pop_data["Country"].values.tolist())
countries &= set(re_data["Country"].values.tolist())
# what to check
raw_climate = pd.read_csv("data/climate.csv")
raw_climate["Country"] = raw_climate["Country"].apply(lowertrim)
print("health")
# print(countries)
print(countries - set(raw_climate["Country"].values.tolist()))