forked from McGillAISociety/mais-202-coding-challenge-f2019
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MAIS_Peng_Yunkai.py
26 lines (25 loc) · 873 Bytes
/
MAIS_Peng_Yunkai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#get new merged csv with both files
home = pd.read_csv("home_ownership_data.csv")
loan = pd.read_csv("loan_data.csv")
merged = home.merge(loan, on="member_id")
merged.to_csv("output.csv", index=False)
#fetch datagram of output.csv
output = pd.read_csv("output.csv")
#get the average of all stats by grouping in home ownership type
average = output.groupby(["home_ownership"]).mean()
#only keep the loan amnt column
average = average[["loan_amnt"]]
#load back into output.csv
average.to_csv("output.csv", index=True)
average = pd.read_csv("output.csv")
#graph
print(average)
average.set_index("home_ownership").plot.bar()
plt.xticks(rotation=0)
plt.xlabel("Home ownership")
plt.ylabel("Average loan amount ($)")
plt.title("Average loan amounts per home ownership")
plt.show()