-
Notifications
You must be signed in to change notification settings - Fork 0
/
warangalrural_Rabi_2018-19.py
95 lines (68 loc) · 2.44 KB
/
warangalrural_Rabi_2018-19.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# To add a new cell, type '#%%'
# To add a new markdown cell, type '#%% [markdown]'
#%% Change working directory from the workspace root to the ipynb file location. Turn this addition off with the DataScience.changeDirOnImportExport setting
# ms-python.python added
import os
try:
os.chdir(os.path.join(os.getcwd(), '../../../../../../../../../var/folders/zq/b5ts9cfj21n_2v_vp7p8w4hr0000gn/T'))
print(os.getcwd())
except:
pass
#%%
import pandas as pd
#%%
df = pd.read_excel("warangal_rural.xlsx", sheet_name = 'RABI')
#%%
# Remove total and division rows from the dataframe
df = df[df[df.columns[0]].str.lower().str.contains('total', na=False) == False]
df = df[df[df.columns[1]].str.lower().str.contains('division', na=False) == False]
# Drop rows with NaNs in all columns
df = df.dropna(how='all')
df = df.dropna(how='all', axis=1)
# Drop rows where the values in the first three columns
# are empty
df = df.dropna(subset=[df.columns[0],df.columns[1],df.columns[2]], how = 'all')
#%%
# Naming the columns - to be able to access them
rows, cols = df.shape
col_list = ['Col' + str(i) for i in range(cols)]
df.columns = col_list
#%%
# New dataframe
colms = ['year', "season", "districtName", "mandalName", "crop", "normalAreaSown", "actualAreaSown"]
df_all = pd.DataFrame(columns=colms)
#%%
df = df.reset_index(drop= True)
#%%
# Getting the types of crop
crop_list = df.loc[0:0, col_list[2:]]
crop_list = crop_list.values.tolist()[0]
crop_list = [x.replace('\n', ' ').strip().capitalize() for x in crop_list if str(x) != 'nan']
#%%
# Repeat mandal list based on number of crops
mandal_list = df['Col1'][2:].values.tolist()
mandal_list = [x.capitalize() for x in mandal_list if str(x)!= 'nan']
mandal_num = len(mandal_list)
mandal_names = mandal_list*len(crop_list)
df_all['mandalName'] = mandal_names
#%%
# Update Crop column depending on mandal names
crop_names = []
for each_crop in crop_list:
crop_names += len(mandal_list) * [each_crop]
df_all['crop'] = crop_names
#%%
# Normal and actual sown
for crop_num in range(len(crop_list)):
df_all['normalAreaSown'][mandal_num*crop_num:mandal_num+mandal_num*crop_num] = df[col_list[6+crop_num*6]][3:]
df_all['actualAreaSown'][mandal_num*crop_num:mandal_num+mandal_num*crop_num] = df[col_list[7+crop_num*6]][3:]
#%%
# Year
df_all.year = '2018-2019'
# Season
df_all['season'] = "Rabi"
# District name
df_all['districtName'] = "Warangal (Rural)"
#%%
df_all.to_csv('warangalrural_Rabi_2018-19.csv', index=False)
#%%