-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbluebank.py
64 lines (59 loc) · 1.66 KB
/
bluebank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 20 17:46:15 2023
"""
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
with open('loan_data_json.json') as jf:
df=json.load(jf)
print(df)
lndf=pd.DataFrame(df)
lndf['purpose'].unique()
lndf.describe()
lndf['int.rate'].describe()
lndf['fico'].describe()
lndf['dti'].describe()
income=np.exp(lndf['log.annual.inc'])
lndf['annualincome']=income
length=len(lndf)
ficocat=[]
for x in range(0,length):
category=lndf['fico'][x]
try:
if category >= 300 and category < 400:
cat='Very Poor'
elif category >= 400 and category < 600:
cat='Poor'
elif category >= 601 and category < 660:
cat='Fair'
elif category >= 660 and category < 700:
cat='Good'
elif category >=700:
cat='Excellent'
else:
cat='Unknown'
except:
cat='error-unknown'
ficocat.append(cat)
ficocat=pd.Series(ficocat)
lndf['fico.category']=ficocat
#df.loc as conditional statement
lndf.loc[ lndf['int.rate']>0.12,'int.rate type']='high'
lndf.loc[ lndf['int.rate']<0.12,'int.rate type']='low'
#no. of loans or rows by fico category
catplot=lndf.groupby(['fico.category']).size()
pplot=lndf.groupby(['purpose']).size()
catplot.plot.bar(color='grey',width=0.1)
plt.show()
pplot.plot.bar(color='grey',width=0.2)
plt.show()
#scatterplot
xpoint=lndf['annualincome']
ypoint=lndf['dti']
plt.scatter(ypoint, xpoint,color='#4CAF50')
plt.show()
#high income less debt less dti
#writing lndf to csv
lndf.to_csv('loanddatacleaned.csv',index=True)