forked from cbitosc/HTF23-Team-54
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
226 lines (176 loc) · 5.98 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# -*- coding: utf-8 -*-
"""resumeevaluation.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1oozfH5DPJbliy5I4HhY7X4f2xXZZ9GTv
"""
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
import joblib
resumedata = pd.read_csv('resumedata.csv')
# Display the first few rows of the dataset
print(resumedata.head())
original_job_categories = resumedata['Category'].unique().tolist()
print(original_job_categories)
def cleanResume(resumeText):
resumeText = re.sub('httpS+s*', ' ', resumeText) # remove URLs
resumeText = re.sub('RT|cc', ' ', resumeText) # remove RT and cc
resumeText = re.sub('#S+', '', resumeText) # remove hashtags
resumeText = re.sub('@S+', ' ', resumeText) # remove mentions
resumeText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[]^_`{|}~"""), ' ', resumeText) # remove punctuations
resumeText = re.sub(r'[^x00-x7f]',r' ', resumeText)
resumeText = re.sub('s+', ' ', resumeText) # remove extra whitespace
return resumeText
resumedata['cleaned_resume'] = resumedata.Resume.apply(lambda x: cleanResume(x))
print(resumedata['cleaned_resume'].head())
resumedata['Resume']=resumedata['cleaned_resume']
resumedata.drop(columns=['cleaned_resume'], inplace=True)
resumedata.Category.unique()
resumedata["Resume"]
var_mod = ['Category']
le = LabelEncoder()
for i in var_mod:
resumedata[i] = le.fit_transform(resumedata[i])
le = LabelEncoder()
le.fit(original_job_categories)
le.classes_
d = {x:le.classes_.tolist().index(x) for x in le.classes_}
d
le.inverse_transform([19])
requiredText = resumedata['Resume'].values
requiredTarget = resumedata['Category'].values
word_vectorizer = TfidfVectorizer(
sublinear_tf=True,
stop_words='english',
max_features=1500)
word_vectorizer.fit(requiredText)
WordFeatures = word_vectorizer.transform(requiredText)
X_train,X_test,y_train,y_test = train_test_split(WordFeatures,requiredTarget,random_state=0, test_size=0.2)
print(X_train.shape)
print(X_test.shape)
clf = KNeighborsClassifier()
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, prediction)
print(X_train)
print(y_train)
print('Accuracy of KNeighbors Classifier on training set: {:.2f}'.format(clf.score(X_train, y_train)))
print('Accuracy of KNeighbors Classifier on test set: {:.2f}'.format(clf.score(X_test, y_test)))
print("n Classification report for classifier %s:n%sn" % (clf, metrics.classification_report(y_test, prediction)))
def predict_results(input_string) :
preprocessed_resume = word_vectorizer.transform([cleanResume(input_string)])
prediction = clf.predict(preprocessed_resume)
predicted_category = le.inverse_transform([prediction[0]])
print(f'The predicted category for the input resume is: {predicted_category[0]}')
p= clf.predict_proba(preprocessed_resume)
p = list(p[0])
list_of_vals = []
probability = [i for i in p if i>0]
print(probability)
for i in probability:
list_of_vals.append(p.index(i))
list_of_vals
list_of_predicts = []
for i in list_of_vals :
for key,val in d.items():
if val == i:
list_of_predicts.append(key)
result = {
"probabilty_list": probability,
"fields":list_of_predicts
}
return result
input_string = '''RAMESH CHALLA
Productive worker with solid work ethic who exerts optimal
effort in successfully completing tasks. Enthusiastic, fast
learner and eager to face challenges and quickly
assimilate new concepts. Able to communicate
effectively and clarify complex technical issues.
Personal Info
Address
16-2-147/D/4, ANAND NAGAR
MALKPET
HYDERABAD, TELANGANA, 500036
Phone
8099519595
E-mail
Skills
C ,Data Structures, RDBMS
C++ ,Java ,Python
Html ,CSS ,Java Script ,PHP
Technologies
Android Development
Web Development
Firebase
My sql
MongoDB
Selenium
Interests & Hobbies
Programming
Sports
Music
Gaming
Languages
English
Telugu
Hindi
Additional Information
DOB: 19 July 1999
LinkedIn:
linkedin.com/in/challaramesh
Academics
2019-2022
Bachelor of Technology: Computer Science
Engineering(CSE)
Keshav Memorial Institute Of Technology, Hyderabad
CGPA : 7.8
2016-2019
Diploma: Computer Engineering(DCME)
Government Polytechnic,Masabtank
Percentage: 90%
2016
SSC
Petros High School, Hyderabad
GPA: 9.3
Work History
28-10-2021
TO
31-12-2021
03-01-2021
Engineering Intern
OpenText, Hyderabad
Associate Quality Assurance Engineer
OpenText, Hyderabad
Projects
Stock Manager – Android Application
Major Project, Keshav Memorial Institute Of Technology
Elite Rentals – Full Stack Web Development Application
Minor Project, Keshav Memorial Institute Of Technology
Phone book – Android Application
Self Interest
Master Gamerz – Android Application
Freelancer Project
Accomplishments
Won the second prize at state level SRUJANA 2018-19
competitions for Alumni Portal (Android Application)
conducted by STATE BOARD OF TECHNICAL EDUCATION.
Acquired state 38th rank in TSECET 2019.
Declaration I hereby declare that all the details furnished above are true
to the best of my knowledge'''
print(predict_results(input_string))