forked from ForeseTech/MocksAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
212 lines (166 loc) · 9.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# Import necessary modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Read CSV files
aptitude_test_df = pd.read_csv("aptitude_test.csv")
group_discussion_df = pd.read_csv("group_discussion.csv")
mocks_df = pd.read_csv("mocks.csv")
def func(pct, allvals):
absolute = round(pct / 100.*np.sum(allvals))
return "{:d}".format(absolute)
# Set seaborn style
sns.set_style(style="darkgrid")
# Convert department to category datatype
aptitude_test_df["department"] = aptitude_test_df["department"].astype("category")
# Series for the average aptitude test score in each department
average_aptitude_score_department = aptitude_test_df.groupby(["department"])["total_score"].mean().sort_values(ascending=True)
departments = average_aptitude_score_department.index
average_aptitude_scores = average_aptitude_score_department.values
# Create barplot
plots = sns.barplot(x=departments, y=average_aptitude_scores, color="salmon")
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points')
# Set values to be displayed on y-axis
plt.yticks(np.arange(5, 55, 5))
# Set x-axis label, y-axis label and title
plt.xlabel("Department")
plt.ylabel("Aptitude Test Score Out Of 50")
plt.title("AVERAGE APTITUDE TEST SCORE - DEPARTMENT WISE")
plt.show()
# Convert department to category datatype
group_discussion_df["department"] = group_discussion_df["department"].astype("category")
# Series for the average GD score in each department
average_gd_score_department = group_discussion_df.groupby(["department"])["total_score"].mean().sort_values(ascending=True)
departments = average_gd_score_department.index
average_gd_scores = average_gd_score_department.values
# Create barplot
plots = sns.barplot(x=departments, y=average_gd_scores)
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points')
# Set values to be displayed on y-axis
plt.yticks(np.arange(5, 35, 5))
# Set x-axis label, y-axis label and title
plt.xlabel("Department")
plt.ylabel("Group Discussion Score Out Of 30")
plt.title("AVERAGE GROUP DISCUSSION SCORE - DEPARTMENT WISE")
plt.show()
# Convert student_department, student_section interviewer_name, interviewer_company to category datatype
mocks_df["student_department"] = mocks_df["student_department"].astype("category")
mocks_df["student_section"] = mocks_df["student_section"].astype("category")
mocks_df["interviewer_name"] = mocks_df["interviewer_name"].astype("category")
mocks_df["interviewer_company"] = mocks_df["interviewer_company"].astype("category")
# Convert interview_date to date datatype
mocks_df["interview_date"] = pd.to_datetime(mocks_df["interview_date"])
# Get number of students per department who participated in online MOCK PLACEMENTS
students_per_department = mocks_df.groupby(["student_department"])["registration_number"].nunique()
# Get total number of students who participated in online MOCK PLACEMENTS
total_students = students_per_department.values.sum()
# Normalize the number of students
students_per_department_normalized = students_per_department / total_students
departments = students_per_department_normalized.index
students_percentage = students_per_department_normalized.values
# Create pie chart
plt.pie(students_percentage, labels=departments, normalize=False, shadow=True, autopct=lambda pct: func(pct, students_per_department), startangle=90)
plt.title("NUMBER OF STUDENTS - DEPARTMENT WISE (ONLINE MOCK PLACEMENTS)")
plt.show()
# Series for the average interview score in each department
average_interview_score_department = mocks_df.groupby(["student_department"])["interview_total"].mean().sort_values(ascending=True)
departments = average_interview_score_department.index
average_interview_scores = average_interview_score_department.values
plots = sns.barplot(x=departments, y=average_interview_score)
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(
format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points'
)
# Create barplot
plots = sns.barplot(x=departments, y=average_interview_scores)
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points')
# Set values to be displayed on y-axis
plt.yticks(np.arange(5, 35, 5))
# Set x-axis label, y-axis label and title
plt.xlabel("Department")
plt.ylabel("Interview Score Out Of 30")
plt.title("AVERAGE INTERVIEW SCORE - DEPARTMENT WISE (ONLINE MOCK PLACEMENTS)")
plt.show()
#Deriving the DataFrame for the interview score for each department on 20-02-2021
options = ['20-02-2021']
student_deets_20 = mocks_df[mocks_df['interview_date'].isin(options)] #Selecting only the rows which had interview_date 20-02-2021
average_interview_score_department_20 = student_deets_20.groupby(["student_department"])["interview_total"].mean().sort_values(ascending=True) #Getting the mean of the interview_total group by department
average_interview_score_department_20_NaN = average_interview_score_department_20.fillna(0) #Replacing the NaN values with 0
departments_20 = average_interview_score_department_20.index
average_interview_score_20 = average_interview_score_department_20_NaN.values
print(average_interview_score_department_20_NaN)
#Plotting the graph
sns.set_style(style="darkgrid")
plots = sns.barplot(x=departments_20, y=average_interview_score_20)
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(
format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points'
)
plt.yticks(np.arange(5, 35, 5))
plt.xlabel("Department")
plt.ylabel("Interview Score Out Of 30")
plt.title("AVERAGE INTERVIEW SCORE - DEPARTMENT WISE (ONLINE MOCK PLACEMENTS - 20/02/2021)")
plt.show()
#Deriving the DataFrame for the interview score for each department on 21-02-2021
options = ['21-02-2021']
student_deets_21 = mocks_df[mocks_df['interview_date'].isin(options)] #Selecting only the rows which had interview_date 21-02-2021
average_interview_score_department_21 = student_deets_21.groupby(["student_department"])["interview_total"].mean().sort_values(ascending=True) #Getting the mean of the interview_total group by department
average_interview_score_department_21_NaN = average_interview_score_department_21.fillna(0) #Replacing the NaN values with 0
departments_21 = average_interview_score_department_21.index
average_interview_score_21 = average_interview_score_department_21_NaN.values
print(average_interview_score_department_21_NaN)
#Plotting the graph
sns.set_style(style="darkgrid")
plots = sns.barplot(x=departments_21, y=average_interview_score_21)
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(
format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points'
)
plt.yticks(np.arange(5, 35, 5))
plt.xlabel("Department")
plt.ylabel("Interview Score Out Of 30")
plt.title("AVERAGE INTERVIEW SCORE - DEPARTMENT WISE (ONLINE MOCK PLACEMENTS - 21/02/2021)")
# Series for the number of interviews attended by each department
interviews_per_department = mocks_df.groupby(["student_department"])["registration_number"].count()
# Series for the number of students from the department who participated in online MOCK PLACEMENTS
students_per_department = mocks_df.groupby(["student_department"])["registration_number"].nunique()
# Series for the average number of interviews a student attended in each department
average_interview_department = interviews_per_department.divide(students_per_department)
departments = average_interview_department.index
average_interview_student = average_interview_department.values
# Create barplot
plots = sns.barplot(x=departments, y=average_interview_student)
# Annotate the bar graphs
for bar in plots.patches:
plots.annotate(format(bar.get_height(), '.2f'), (bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center', size=15, xytext=(0, 8),
textcoords='offset points')
# Set values to be displayed on y-axis
plt.yticks(np.arange(0.0, 2.75, 0.25))
# Set x-axis label, y-axis label and title
plt.xlabel("Department")
plt.ylabel("Number of Interviews Attended")
plt.title("AVERAGE NUMBER OF INTERVIEWS ATTENDED BY A STUDENT - DEPARTMENT WISE (ONLINE MOCK PLACEMENTS)")
plt.show()