test.py

# -*- coding: utf-8 -*-
"""resumeevaluation.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1oozfH5DPJbliy5I4HhY7X4f2xXZZ9GTv
"""

import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
import joblib

resumedata = pd.read_csv('resumedata.csv')

# Display the first few rows of the dataset
print(resumedata.head())

original_job_categories = resumedata['Category'].unique().tolist()
print(original_job_categories)

def cleanResume(resumeText):
    resumeText = re.sub('httpS+s*', ' ', resumeText)  # remove URLs
    resumeText = re.sub('RT|cc', ' ', resumeText)  # remove RT and cc
    resumeText = re.sub('#S+', '', resumeText)  # remove hashtags
    resumeText = re.sub('@S+', '  ', resumeText)  # remove mentions
    resumeText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[]^_`{|}~"""), ' ', resumeText)  # remove punctuations
    resumeText = re.sub(r'[^x00-x7f]',r' ', resumeText)
    resumeText = re.sub('s+', ' ', resumeText)  # remove extra whitespace
    return resumeText

resumedata['cleaned_resume'] = resumedata.Resume.apply(lambda x: cleanResume(x))

print(resumedata['cleaned_resume'].head())

resumedata['Resume']=resumedata['cleaned_resume']

resumedata.drop(columns=['cleaned_resume'], inplace=True)

resumedata.Category.unique()

resumedata["Resume"]

var_mod = ['Category']
le = LabelEncoder()
for i in var_mod:
    resumedata[i] = le.fit_transform(resumedata[i])

le = LabelEncoder()
le.fit(original_job_categories)
le.classes_

d = {x:le.classes_.tolist().index(x) for x in le.classes_}
d

le.inverse_transform([19])

requiredText = resumedata['Resume'].values
requiredTarget = resumedata['Category'].values
word_vectorizer = TfidfVectorizer(
    sublinear_tf=True,
    stop_words='english',
    max_features=1500)
word_vectorizer.fit(requiredText)
WordFeatures = word_vectorizer.transform(requiredText)

X_train,X_test,y_train,y_test = train_test_split(WordFeatures,requiredTarget,random_state=0, test_size=0.2)
print(X_train.shape)
print(X_test.shape)
clf = KNeighborsClassifier()
clf.fit(X_train, y_train)
prediction = clf.predict(X_test)

from sklearn.metrics import accuracy_score
accuracy_score(y_test, prediction)

print(X_train)
print(y_train)

print('Accuracy of KNeighbors Classifier on training set: {:.2f}'.format(clf.score(X_train, y_train)))
print('Accuracy of KNeighbors Classifier on test set: {:.2f}'.format(clf.score(X_test, y_test)))
print("n Classification report for classifier %s:n%sn" % (clf, metrics.classification_report(y_test, prediction)))



def predict_results(input_string) :
    preprocessed_resume = word_vectorizer.transform([cleanResume(input_string)])
    prediction = clf.predict(preprocessed_resume)
    predicted_category = le.inverse_transform([prediction[0]])

    print(f'The predicted category for the input resume is: {predicted_category[0]}')

    p= clf.predict_proba(preprocessed_resume)

    p = list(p[0])
    list_of_vals = []
    probability = [i for i in p if i>0]
    print(probability)
    for i in probability:
        list_of_vals.append(p.index(i))

    list_of_vals
    list_of_predicts = []
    for i in list_of_vals :
        for key,val in d.items():
            if val == i:
                list_of_predicts.append(key)

    result = {
        "probabilty_list": probability,
        "fields":list_of_predicts
    }
    return result

input_string = '''RAMESH CHALLA
Productive worker with solid work ethic who exerts optimal
effort in successfully completing tasks. Enthusiastic, fast
learner and eager to face challenges and quickly
assimilate new concepts. Able to communicate
effectively and clarify complex technical issues.

Personal Info
Address
16-2-147/D/4, ANAND NAGAR
MALKPET
HYDERABAD, TELANGANA, 500036
Phone
8099519595
E-mail
rameshyadavchalla@gmail.com

Skills
 C ,Data Structures, RDBMS
 C++ ,Java ,Python
 Html ,CSS ,Java Script ,PHP

Technologies
 Android Development
 Web Development
 Firebase
 My sql
 MongoDB
 Selenium


Interests & Hobbies
 Programming
 Sports
 Music
 Gaming

Languages
 English
 Telugu
 Hindi
Additional Information
 DOB: 19 July 1999
LinkedIn:
linkedin.com/in/challaramesh

Academics

2019-2022

Bachelor of Technology: Computer Science
Engineering(CSE)
Keshav Memorial Institute Of Technology, Hyderabad
 CGPA : 7.8

2016-2019

Diploma: Computer Engineering(DCME)
Government Polytechnic,Masabtank
 Percentage: 90%

2016

SSC
Petros High School, Hyderabad
 GPA: 9.3
Work History

28-10-2021
TO
31-12-2021

03-01-2021

Engineering Intern
OpenText, Hyderabad


Associate Quality Assurance Engineer
OpenText, Hyderabad
Projects
Stock Manager – Android Application
 Major Project, Keshav Memorial Institute Of Technology
Elite Rentals – Full Stack Web Development Application
 Minor Project, Keshav Memorial Institute Of Technology
Phone book – Android Application
 Self Interest
Master Gamerz – Android Application
 Freelancer Project
Accomplishments
 Won the second prize at state level SRUJANA 2018-19
competitions for Alumni Portal (Android Application)
conducted by STATE BOARD OF TECHNICAL EDUCATION.
 Acquired state 38th rank in TSECET 2019.
Declaration        I hereby declare that all the details furnished above are true
to the best of my knowledge'''
print(predict_results(input_string))