diff --git a/README.rst b/README.rst index a99e8be..c238c5d 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,7 @@ Features - Extract degree - Extract designation - Extract company names +- Extract linkedin url Installation ============ diff --git a/pyresparser/resume_parser.py b/pyresparser/resume_parser.py index 8817057..46f8a96 100644 --- a/pyresparser/resume_parser.py +++ b/pyresparser/resume_parser.py @@ -62,6 +62,7 @@ def __get_basic_details(self): self.__noun_chunks, self.__skills_file ) + linkedin = utils.extract_linkedin(self.__text) # edu = utils.extract_education( # [sent.string.strip() for sent in self.__nlp.sents] # ) @@ -121,6 +122,8 @@ def __get_basic_details(self): self.__details['no_of_pages'] = utils.get_number_of_pages( self.__resume ) + + self.__details['linkedin'] = linkedin return diff --git a/pyresparser/utils.py b/pyresparser/utils.py index 94ea373..4213090 100644 --- a/pyresparser/utils.py +++ b/pyresparser/utils.py @@ -494,3 +494,17 @@ def extract_experience(resume_text): if x and 'experience' in x.lower() ] return x + + +def extract_linkedin(text): + ''' + Helper function to extract linkedin from text + + :param text: plain text extracted from resume file + ''' + linkedin = re.findall(r'([\s]+linkedin.com[^\s]+)', text) + if linkedin: + try: + return linkedin[0][0] + except IndexError: + return None diff --git a/test_name.py b/test_name.py index e3fc245..c88e364 100644 --- a/test_name.py +++ b/test_name.py @@ -39,3 +39,8 @@ def test_local_name(): def test_local_phone_number(): data = get_local_data() assert '8087996634' == data['mobile_number'] + +def test_linkedin_url(): + data = get_remote_data() + # no valid linkedin url yet + assert None == data[0]['linkedin']