From f348e42cbcb5287c574c613ddf67185baf5f6642 Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Fri, 18 May 2018 17:19:14 -0400 Subject: [PATCH 1/7] #3 showing indexes of experience section --- src/main/java/com/cv/parser/CVparserMain.java | 217 +----------------- .../applicant/ParseApplicantExperience.java | 9 +- 2 files changed, 16 insertions(+), 210 deletions(-) diff --git a/src/main/java/com/cv/parser/CVparserMain.java b/src/main/java/com/cv/parser/CVparserMain.java index 9bd65c2..d40eb29 100644 --- a/src/main/java/com/cv/parser/CVparserMain.java +++ b/src/main/java/com/cv/parser/CVparserMain.java @@ -1,21 +1,9 @@ package com.cv.parser; -import java.awt.Dimension; -import java.awt.Point; -import java.awt.Toolkit; -import java.awt.event.KeyEvent; -import java.awt.image.BufferedImage; import java.io.File; import java.util.ArrayList; import java.util.List; -import javax.swing.JDialog; -import javax.swing.JFrame; -import javax.swing.JMenu; -import javax.swing.JMenuBar; -import javax.swing.JMenuItem; -import javax.swing.JPanel; - import org.eclipse.swt.SWT; import org.eclipse.swt.widgets.Button; import org.eclipse.swt.widgets.Display; @@ -26,10 +14,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.cv.parser.util.ImagePanel; -import com.cv.parser.util.Message; -import com.cv.parser.util.JDialogHelper; -import com.cv.parser.applicant.CVForm; import com.cv.parser.applicant.DocumentDetails; import com.cv.parser.extract.ExtractFiles; import com.cv.parser.extract.MSExtractor; @@ -40,206 +24,23 @@ public class CVparserMain { static Logger logger = LoggerFactory.getLogger(CVparserMain.class); - private static Message message = new Message(); + File resumesStoragePath = new File(CVparserSingleton.getInstance().resumesStoragePath); protected Shell shell; - private static BufferedImage image; - private static JFrame mainFrame; - protected JDialog parentDialog; - /** * Launch the application. * * @param args */ - public static void main(String[] args) { - - // Schedule a job for the event-dispatching thread: //creating and - // Showing this application's GUI. - javax.swing.SwingUtilities.invokeLater(new Runnable() { - public void run() { - createGUIMenu(); - } - }); + public static void main(String[] args) { + try { + CVparserMain window = new CVparserMain(); + window.open(); + } catch (Exception e) { + logger.error("main", e); } - - /* - * Create and setup the Frame Windows GUI - */ - protected static void createGUIMenu() { - mainFrame = new javax.swing.JFrame(message.msg("mainTitle")); // Prepare - // a - // blank - // frame - mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); - - JPanel panel = new ImagePanel(); // prepare the JPanel that hosted the - // background image - mainFrame.add(panel); - - CVparserMain app = new CVparserMain(); // start the quiz menu - - mainFrame.setJMenuBar(app.createMenu()); // Set up the menu bar on the - // top of the frame - - Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); - mainFrame.setSize(screenSize.width, screenSize.height); - - mainFrame.setVisible(true); - } - - /** - * Set up the menu bar, on the top of the frame.. Addition: only comments - * March 15 2017 @author RAYMARTHINKPAD This creates the menus in the frame. - * The menu includes items that are event driven on click the action will be - * performed. - */ - public JMenuBar createMenu() { - JMenuBar menuBar = new JMenuBar(); // create the menu bar at the top of - - // Add your testing under the first Menu.. - JMenu fileMenu = new JMenu("Test Parsing using JSON"); // First group will handle master - fileMenu.setMnemonic(KeyEvent.VK_F); // The shortcut is ALT + M (Master - menuBar.add(fileMenu); // add to the menu bar - - // Thje menu item for your test code is here... - JMenuItem parseMenuTest1 = new JMenuItem("Regex JSON style", KeyEvent.VK_D); - parseMenuTest1.setToolTipText("Test parsing Document"); - parseMenuTest1.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - parseMenuTest1MenuItemActionPerformed(evt); - // entryUserMenuItemActionPerformed(evt); - } - }); - - fileMenu.add(parseMenuTest1); - - - // Thje menu item for your test code is here... - JMenuItem parseMenuTest2 = new JMenuItem("You GUI JPanel here", KeyEvent.VK_D); - parseMenuTest2.setToolTipText("Test parsing Document"); - parseMenuTest2.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - parseMenuTest2MenuItemActionPerformed(evt); - } - }); - - fileMenu.add(parseMenuTest2); - - - - - /************ This menu is for my menu test 40046196 *************/ - - // Third Menu in Menu Bar is Window About and Exit - JMenu menu2 = new JMenu("Parse Doc 2 Test"); - menu2.setMnemonic(KeyEvent.VK_H); // Shortcut is ALT + E - menuBar.add(menu2); // add exit menu to menu bar - - // First menu item for windows is About - JMenuItem uploadDocumentToForm = new JMenuItem("Parse Doc to Form", KeyEvent.VK_A); - uploadDocumentToForm.setToolTipText("Parse Doc to Form"); - uploadDocumentToForm.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - uploadDocumentToFormActionPerformed(evt); - } - }); - - menu2.add(uploadDocumentToForm); // add about menu item to exit MEnu - - - return menuBar; - } - - /** - * The events when user click UserForm - * Note from me: - * Should porting the forms in JPanel like previous projects, manageable control form Tree Menu Structure. - * Also using JPanel, would learn how to advance the use of JTable, position, etc. - * For now the menu only call your design procedure directly.. - * - * @param evt - */ - private void parseMenuTest1MenuItemActionPerformed(java.awt.event.ActionEvent evt) { - - try { - this.open(); - } catch (Exception e) { - logger.error("main", e); - } - - -// UserForm userForm = new UserForm(); // create instance UserForm -// JDialog dialog = new JDialog(this.mainFrame, message.msg("userFormTitle"), true); -// -// dialog.add(userForm); -// dialog.pack(); -// -// Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); -// Dimension dialogSize = dialog.getSize(); // get your frame size -// dialog.setLocation(new Point((screenSize.width - dialogSize.width) / 2, -// (screenSize.height - dialogSize.height) / 2)); // to the center -// // of screen -// dialog.setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE); -// JDialogHelper.setJDialogTree(dialog, null, null); -// dialog.setVisible(true); // When setVisible this program waiting, until -// // you close the dialog. -// dialog.dispose(); - - } - - private void parseMenuTest2MenuItemActionPerformed(java.awt.event.ActionEvent evt) { - - com.cv.parser.About cvForm = new com.cv.parser.About(); // create instance UserForm - JDialog dialog = new JDialog(this.mainFrame, "CV Form", true); - - dialog.add(cvForm); - dialog.pack(); - - Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); - Dimension dialogSize = dialog.getSize(); // get your frame size - dialog.setLocation(new Point((screenSize.width - dialogSize.width) / 2, - (screenSize.height - dialogSize.height) / 2)); // to the center - // of screen - dialog.setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE); - JDialogHelper.setJDialogTree(dialog, null, null); - dialog.setVisible(true); // When setVisible this program waiting, until - // you close the dialog. - dialog.dispose(); - - } - - private void uploadDocumentToFormActionPerformed(java.awt.event.ActionEvent evt) { - - com.cv.parser.applicant.CVForm cvForm = new com.cv.parser.applicant.CVForm(); // create instance UserForm - JDialog dialog = new JDialog(this.mainFrame, "CV Form" , true); - - dialog.add(cvForm); - dialog.pack(); - - Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); - Dimension dialogSize = dialog.getSize(); // get your frame size - dialog.setLocation(new Point((screenSize.width - dialogSize.width) / 2, - (screenSize.height - dialogSize.height) / 2)); // to the center - // of screen - dialog.setDefaultCloseOperation(javax.swing.WindowConstants.DISPOSE_ON_CLOSE); - JDialogHelper.setJDialogTree(dialog, null, null); - dialog.setVisible(true); // When setVisible this program waiting, until - // you close the dialog. - dialog.dispose(); - - } - - -// public static void main(String[] args) { -// try { -// CVparserMain window = new CVparserMain(); -// window.open(); -// } catch (Exception e) { -// logger.error("main", e); -// } -// } + } /** * Open the window. @@ -355,4 +156,4 @@ protected void createContents() { dd.handleButtonClick(); /**** saving to database END ****/ } -} +} \ No newline at end of file diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index 0b6ad66..9ef6c84 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -6,12 +6,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.cv.parser.RegEx; import com.cv.parser.entity.ApplicantDocument; import com.cv.parser.entity.ApplicantExperience; +import com.cv.parser.helper.ParserHelper; import com.cv.parser.helper.WorkExperienceHelper; /** - * Fetches data to be stored in {@link} ApplicantExperience + * Fetches data to be stored in {@link ApplicantExperience} * * @author RAYMARTHINKPAD * @@ -42,7 +44,10 @@ public List getApplicantExperience() { } private WorkExperienceHelper[] findWorkExperience(String line) { - return null; + WorkExperienceHelper[] experiences = null; + ParserHelper parser = new ParserHelper(); + logger.info(parser.getIndexOfThisSection(RegEx.EXPERIENCE, line)+""); + return experiences; } } From a5738e182a4bbdfecb6848511fe794995c99e47c Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Fri, 18 May 2018 23:09:20 -0400 Subject: [PATCH 2/7] #3 more regular expressions for section title --- src/main/java/com/cv/parser/RegEx.java | 8 +++++++- .../applicant/ParseApplicantExperience.java | 19 +++++++++++-------- .../com/cv/parser/helper/ParserHelper.java | 9 ++++++--- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/cv/parser/RegEx.java b/src/main/java/com/cv/parser/RegEx.java index 4f799f5..f4da097 100644 --- a/src/main/java/com/cv/parser/RegEx.java +++ b/src/main/java/com/cv/parser/RegEx.java @@ -6,7 +6,13 @@ public enum RegEx { PHONE ("\\(?([0-9]{3})\\)?[-. ]([0-9]{3})[-. ]?[-. ]?([0-9]{4})"), OBJECTIVE ("\\b(Objective|Objectives|OBJECTIVE|OBJECTIVES)([^-!@#$%^&*()+.,?])\\b"), EDUCATION ("\\b(Education|Educations|EDUCATION|EDUCATIONS)\\b"), - EXPERIENCE ("\\b(Experience|Experiences|EXPERIENCE|EXPERIENCES)\\b"); + EXPERIENCE ("\\b(Experience(s?)|EXPERIENCE(S?))\\b"), + SKILLS ("\\b(Skill(s?)|SKILL(S?)|Expertise(s?)|Skills\\s&?\\sExpertises)\\b"), + LANGUAGE ("\\b(Language(s?)|LANGUAGE(S?))\\b"), + INTEREST ("\\b(Interest(s?)|INTEREST(S?)|Activity|Activities|ACTIVITIES|ACTIVITY)\\b"), + MEMBERSHIP ("\\b(Membership(s?)|MEMBERSHIP(S?))\\b"), + ADDITIONAL ("\\b(Award(s?)|AWARD(S)|Honor(s?)|HONOR(S?)|Certification(s?)|CERTIFICATION(S?)|Accomplishment(s?)|ACCOMPLISHMENT(S?)|Project(s?)|PROJECT(S?))\\b"), + DATEFROMTO ("([A-Za-z]+\\s)?([0-9]{4})\\s[-]\\s([A-Za-z]+\\s)?([0-9]{4})|([A-Za-z]*?\\sat\\s[A-Za-z]+)"); // TODO // there is also SKILLS, LANGUAGES and diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index 9ef6c84..92f56d8 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -2,6 +2,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -13,7 +15,7 @@ import com.cv.parser.helper.WorkExperienceHelper; /** - * Fetches data to be stored in {@link ApplicantExperience} + * Fetches data to be stored in {@link ApplicantExperience} * * @author RAYMARTHINKPAD * @@ -33,8 +35,7 @@ public void setApplicantExperience() { for (ApplicantDocument ad : appDocList) { ApplicantExperience ae = new ApplicantExperience(); ae.setId(ad.getId()); - ae.setExperience(findWorkExperience(ad.getLine())); - + ae.setExperience(findWorkExperience(ad.getId(), ad.getLine())); this.applicantExperience.add(ae); } } @@ -43,11 +44,13 @@ public List getApplicantExperience() { return applicantExperience; } - private WorkExperienceHelper[] findWorkExperience(String line) { - WorkExperienceHelper[] experiences = null; + private WorkExperienceHelper[] findWorkExperience(int id, String line) { ParserHelper parser = new ParserHelper(); - logger.info(parser.getIndexOfThisSection(RegEx.EXPERIENCE, line)+""); - return experiences; + logger.info(id + ": section indexes of this resume => "+parser.getIndexesOfSection(line).toString()); + logger.info(id + ": index of experience section in this =>"+parser.getIndexOfThisSection(RegEx.EXPERIENCE, line)); + // copy texts starting from experience section index to the following section index + // experience index < following section index + return null; } - + } diff --git a/src/main/java/com/cv/parser/helper/ParserHelper.java b/src/main/java/com/cv/parser/helper/ParserHelper.java index f8e54f6..c4baa41 100644 --- a/src/main/java/com/cv/parser/helper/ParserHelper.java +++ b/src/main/java/com/cv/parser/helper/ParserHelper.java @@ -32,7 +32,8 @@ public ParserHelper() { } public int getIndexOfThisSection(RegEx regEx, String line) { - RegEx[] sectionRegex = { RegEx.OBJECTIVE, RegEx.EDUCATION, RegEx.EXPERIENCE }; + RegEx[] sectionRegex = { RegEx.OBJECTIVE, RegEx.EDUCATION, RegEx.EXPERIENCE, RegEx.SKILLS, RegEx.LANGUAGE, + RegEx.INTEREST, RegEx.MEMBERSHIP, RegEx.ADDITIONAL }; List indexOfThisSection = new ArrayList(); for (RegEx r : sectionRegex) { if (r.equals(regEx)) { @@ -56,7 +57,8 @@ public int getIndexOfThisSection(RegEx regEx, String line) { * @return index of each section */ public List getIndexesOfSection(String line) { - RegEx[] sectionRegex = { RegEx.OBJECTIVE, RegEx.EDUCATION, RegEx.EXPERIENCE }; + RegEx[] sectionRegex = { RegEx.OBJECTIVE, RegEx.EDUCATION, RegEx.EXPERIENCE, RegEx.SKILLS, RegEx.LANGUAGE, + RegEx.INTEREST, RegEx.MEMBERSHIP, RegEx.ADDITIONAL }; List indexesOfSection = new ArrayList(); for (RegEx r : sectionRegex) { Pattern pattern = Pattern.compile(r.toString(), Pattern.MULTILINE | Pattern.DOTALL); @@ -77,7 +79,8 @@ public List getIndexesOfSection(String line) { * @return indexes that follows regEx section */ public List getIndexesOfSection(RegEx regEx, String line) { - RegEx[] sectionRegex = { RegEx.OBJECTIVE, RegEx.EDUCATION, RegEx.EXPERIENCE }; + RegEx[] sectionRegex = { RegEx.OBJECTIVE, RegEx.EDUCATION, RegEx.EXPERIENCE, RegEx.SKILLS, RegEx.LANGUAGE, + RegEx.INTEREST, RegEx.MEMBERSHIP, RegEx.ADDITIONAL }; List indexesOfSection = new ArrayList(); for (RegEx r : sectionRegex) { if (!r.equals(regEx)) { From 45864b8c27d5229ed2b260c30fc4cef4a875e629 Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Fri, 18 May 2018 23:31:52 -0400 Subject: [PATCH 3/7] for #3 now we can extract experience section texts --- .../applicant/ParseApplicantExperience.java | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index 92f56d8..3ed3cd0 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -46,10 +46,33 @@ public List getApplicantExperience() { private WorkExperienceHelper[] findWorkExperience(int id, String line) { ParserHelper parser = new ParserHelper(); - logger.info(id + ": section indexes of this resume => "+parser.getIndexesOfSection(line).toString()); - logger.info(id + ": index of experience section in this =>"+parser.getIndexOfThisSection(RegEx.EXPERIENCE, line)); - // copy texts starting from experience section index to the following section index - // experience index < following section index + + //logger.info(id + ": section indexes of this resume => "+parser.getIndexesOfSection(line).toString()); + //logger.info(id + ": index of experience section in this =>"+parser.getIndexOfThisSection(RegEx.EXPERIENCE, line)); + + /* + * copy texts starting from experience section index to the following section index + * experience index is LESS THAN the following section index, therefore + * + * Example: + * section indexes [24, 355, 534, 669] + * index of experience section = 355 + * therefore, the following section index would be 534 + * we can get the texts that encompasses experience section + * by substring => (indexOfExperience, beginIndexOfFollowingSection) + * + */ + int indexOfExperience = parser.getIndexOfThisSection(RegEx.EXPERIENCE, line); + int nextSectionIndex = 0; // index that follows experience section + for (int index = 0; index < parser.getIndexesOfSection(line).size(); index++) { + if (parser.getIndexesOfSection(line).get(index) == indexOfExperience) { + nextSectionIndex = parser.getIndexesOfSection(line).get(index + 1); + break; + } + } + + logger.info(line.substring(indexOfExperience, nextSectionIndex)); + return null; } From 3b198cbe789c3a31601e3fc42ca726e3992bda7e Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Sun, 20 May 2018 13:01:19 -0400 Subject: [PATCH 4/7] unit test testWorkExperienceHelper, #3 work in progress --- .../applicant/ParseApplicantExperience.java | 18 +++--- .../cv/parser/entity/ApplicantExperience.java | 19 +++--- .../parser/helper/WorkExperienceHelper.java | 60 +++++++++++++++++-- src/test/java/com/cv/parser/AddressTest.java | 18 +++--- .../java/com/cv/parser/ApplicantTest.java | 37 ++++++++++-- 5 files changed, 112 insertions(+), 40 deletions(-) diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index 3ed3cd0..b6a3353 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -42,14 +42,10 @@ public void setApplicantExperience() { public List getApplicantExperience() { return applicantExperience; - } + } - private WorkExperienceHelper[] findWorkExperience(int id, String line) { - ParserHelper parser = new ParserHelper(); - - //logger.info(id + ": section indexes of this resume => "+parser.getIndexesOfSection(line).toString()); - //logger.info(id + ": index of experience section in this =>"+parser.getIndexOfThisSection(RegEx.EXPERIENCE, line)); - + private String findWorkExperience(int id, String line) { + ParserHelper parser = new ParserHelper(); /* * copy texts starting from experience section index to the following section index * experience index is LESS THAN the following section index, therefore @@ -70,10 +66,10 @@ private WorkExperienceHelper[] findWorkExperience(int id, String line) { break; } } - - logger.info(line.substring(indexOfExperience, nextSectionIndex)); - - return null; + // logger.info(line.substring(indexOfExperience, nextSectionIndex)); + String experienceText = line.replaceFirst(RegEx.EXPERIENCE.toString(), ""); + // TODO call WorkExperienceHelper and parse the line + return experienceText.substring(indexOfExperience, nextSectionIndex); } } diff --git a/src/main/java/com/cv/parser/entity/ApplicantExperience.java b/src/main/java/com/cv/parser/entity/ApplicantExperience.java index e17d71c..20e92ee 100644 --- a/src/main/java/com/cv/parser/entity/ApplicantExperience.java +++ b/src/main/java/com/cv/parser/entity/ApplicantExperience.java @@ -1,9 +1,5 @@ package com.cv.parser.entity; -import java.util.Arrays; - -import com.cv.parser.helper.WorkExperienceHelper; - public class ApplicantExperience { public ApplicantExperience() { @@ -11,7 +7,9 @@ public ApplicantExperience() { } private int id; - private WorkExperienceHelper[] experience; // an applicant may have more than one experience + //private WorkExperienceHelper[] experience; // an applicant may have more than one experience + private String experience; // store it as string for now + // change to List only if managed to parse and store in WorkExperienceHelper public int getId() { return id; @@ -21,16 +19,19 @@ public void setId(int id) { this.id = id; } - public WorkExperienceHelper[] getExperience() { + public String getExperience() { return experience; } - public void setExperience(WorkExperienceHelper[] experience) { - this.experience = experience; + public void setExperience(String experienceData) { + // WorkExperienceHelper[] experienceArr; + // parse experience from experienceData + // store them in + this.experience = experienceData; } @Override public String toString() { - return "ApplicantExperience [id=" + id + ", experience=" + Arrays.toString(experience) + "]"; + return "ApplicantExperience [id=" + id + ", experience=" + experience + "]"; } } diff --git a/src/main/java/com/cv/parser/helper/WorkExperienceHelper.java b/src/main/java/com/cv/parser/helper/WorkExperienceHelper.java index e878171..725d7a6 100644 --- a/src/main/java/com/cv/parser/helper/WorkExperienceHelper.java +++ b/src/main/java/com/cv/parser/helper/WorkExperienceHelper.java @@ -16,12 +16,60 @@ public class WorkExperienceHelper { private String description; public WorkExperienceHelper() { - this.position = null; - this.company = null; - this.address = null; - this.dateFrom = null; - this.dateTo = null; - this.description = null; + } + + public String getPosition() { + return position; + } + + public void setPosition(String position) { + this.position = position; + } + + public String getCompany() { + return company; + } + + public void setCompany(String company) { + this.company = company; + } + + public String getAddress() { + return address; + } + + public void setAddress(String address) { + this.address = address; + } + + public String getDateFrom() { + return dateFrom; + } + + public void setDateFrom(String dateFrom) { + this.dateFrom = dateFrom; + } + + public String getDateTo() { + return dateTo; + } + + public void setDateTo(String dateTo) { + this.dateTo = dateTo; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + @Override + public String toString() { + return "WorkExperienceHelper [position=" + position + ", company=" + company + ", address=" + address + + ", dateFrom=" + dateFrom + ", dateTo=" + dateTo + ", description=" + description + "]"; } /* diff --git a/src/test/java/com/cv/parser/AddressTest.java b/src/test/java/com/cv/parser/AddressTest.java index b910601..5edb823 100644 --- a/src/test/java/com/cv/parser/AddressTest.java +++ b/src/test/java/com/cv/parser/AddressTest.java @@ -10,26 +10,26 @@ import junit.framework.TestCase; public class AddressTest extends TestCase { - Logger logger = LoggerFactory.getLogger(RegexTest.class); + Logger logger = LoggerFactory.getLogger(AddressTest.class); public void test() { Faker faker = new Faker(); - + String usAddress = faker.address().fullAddress(); logger.info(usAddress); - + String canadianAddress = "Surrey, BC"; logger.info(canadianAddress); - + ParserHelper parserHelper = new ParserHelper(); - + Map us = parserHelper.getUSstatesMap(); - for (Map.Entry usKV: us.entrySet()) { + for (Map.Entry usKV : us.entrySet()) { // use pattern and regular expression not indexOf } - + Map can = parserHelper.getCanadianProvincesMap(); - for (Map.Entry canKV: can.entrySet()) { - } + for (Map.Entry canKV : can.entrySet()) { + } } } diff --git a/src/test/java/com/cv/parser/ApplicantTest.java b/src/test/java/com/cv/parser/ApplicantTest.java index 45e9803..c2ae3dd 100644 --- a/src/test/java/com/cv/parser/ApplicantTest.java +++ b/src/test/java/com/cv/parser/ApplicantTest.java @@ -1,17 +1,44 @@ package com.cv.parser; +import java.util.ArrayList; +import java.util.List; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.cv.parser.applicant.DocumentDetails; -import com.cv.parser.entity.Applicant; +import com.cv.parser.helper.WorkExperienceHelper; +import com.github.javafaker.Faker; import junit.framework.TestCase; public class ApplicantTest extends TestCase { - Logger logger = LoggerFactory.getLogger(DocumentDetails.class); + Logger logger = LoggerFactory.getLogger(ApplicantTest.class); - public void test() { - //Applicant a = new Applicant(); + public void testWorkExperienceHelper() { + Faker faker = new Faker(); + WorkExperienceHelper weh = new WorkExperienceHelper(); + weh.setPosition(faker.company().profession()); + weh.setCompany(faker.company().name()); + weh.setAddress(faker.address().fullAddress()); + weh.setDateFrom(faker.date().birthday(18, 50).toString()); + weh.setDateTo(faker.date().birthday(18, 50).toString()); + weh.setDescription(faker.lorem().sentence()); + + WorkExperienceHelper weh2 = new WorkExperienceHelper(); + weh2.setPosition(faker.company().profession()); + weh2.setCompany(faker.company().name()); + weh2.setAddress(faker.address().fullAddress()); + weh2.setDateFrom(faker.date().birthday(18, 50).toString()); + weh2.setDateTo(faker.date().birthday(18, 50).toString()); + weh2.setDescription(faker.lorem().sentence()); + + + List weh3 = new ArrayList(); + weh3.add(weh); + weh3.add(weh2); + + logger.info(weh3.toString()); + } + } From de638eb87a63943f5d52e3548cda88cec7575a44 Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Mon, 21 May 2018 13:40:03 -0400 Subject: [PATCH 5/7] #2 parsing education in bulk (sentences), work in progress --- .classpath | 2 +- .settings/org.eclipse.jdt.core.prefs | 13 ++++++++--- pom.xml | 10 ++++----- src/main/java/com/cv/parser/RegEx.java | 6 +---- .../cv/parser/applicant/DocumentDetails.java | 16 +++++++++----- .../applicant/ParseApplicantEducation.java | 22 +++++++++++++++++-- .../applicant/ParseApplicantExperience.java | 9 ++------ .../cv/parser/entity/ApplicantExperience.java | 1 - .../com/cv/parser/extract/MSExtractor.java | 5 +---- .../com/cv/parser/extract/TXTExtractor.java | 3 --- .../com/cv/parser/helper/ParserHelper.java | 2 +- 11 files changed, 51 insertions(+), 38 deletions(-) diff --git a/.classpath b/.classpath index dbe94d2..fe213f3 100644 --- a/.classpath +++ b/.classpath @@ -12,7 +12,7 @@ - + diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs index abec6ca..6249222 100644 --- a/.settings/org.eclipse.jdt.core.prefs +++ b/.settings/org.eclipse.jdt.core.prefs @@ -1,5 +1,12 @@ eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 -org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.5 +org.eclipse.jdt.core.compiler.source=1.7 diff --git a/pom.xml b/pom.xml index c472c57..371da88 100644 --- a/pom.xml +++ b/pom.xml @@ -61,10 +61,10 @@ javafaker 0.15 - - com.googlecode.json-simple - json-simple - 1.1.1 - + + com.googlecode.json-simple + json-simple + 1.1.1 + diff --git a/src/main/java/com/cv/parser/RegEx.java b/src/main/java/com/cv/parser/RegEx.java index f4da097..463e54d 100644 --- a/src/main/java/com/cv/parser/RegEx.java +++ b/src/main/java/com/cv/parser/RegEx.java @@ -12,11 +12,7 @@ public enum RegEx { INTEREST ("\\b(Interest(s?)|INTEREST(S?)|Activity|Activities|ACTIVITIES|ACTIVITY)\\b"), MEMBERSHIP ("\\b(Membership(s?)|MEMBERSHIP(S?))\\b"), ADDITIONAL ("\\b(Award(s?)|AWARD(S)|Honor(s?)|HONOR(S?)|Certification(s?)|CERTIFICATION(S?)|Accomplishment(s?)|ACCOMPLISHMENT(S?)|Project(s?)|PROJECT(S?))\\b"), - DATEFROMTO ("([A-Za-z]+\\s)?([0-9]{4})\\s[-]\\s([A-Za-z]+\\s)?([0-9]{4})|([A-Za-z]*?\\sat\\s[A-Za-z]+)"); - - // TODO - // there is also SKILLS, LANGUAGES and - // ADDITIONAL that contains (awards, honors, projects, courses, certification) + DATEFROMTO ("([A-Za-z]+\\s)?([0-9]{4})\\s[-]\\s\\b((P|p)resent|(C|c)urrent)\\b|([A-Za-z]+\\s)?([0-9]{4})"); private final String name; diff --git a/src/main/java/com/cv/parser/applicant/DocumentDetails.java b/src/main/java/com/cv/parser/applicant/DocumentDetails.java index 5b23dae..74eeafa 100644 --- a/src/main/java/com/cv/parser/applicant/DocumentDetails.java +++ b/src/main/java/com/cv/parser/applicant/DocumentDetails.java @@ -12,6 +12,7 @@ import com.cv.parser.entity.Applicant; import com.cv.parser.entity.ApplicantDocument; +import com.cv.parser.entity.ApplicantEducation; import com.cv.parser.entity.ApplicantExperience; public class DocumentDetails { @@ -42,16 +43,19 @@ public void handleEvent(org.eclipse.swt.widgets.Event arg0) { //////////////////////////////////// - ParseApplicantExperience applicantExperience = new ParseApplicantExperience(appDocList); - applicantExperience.setApplicantExperience(); - for (ApplicantExperience ae : applicantExperience.getApplicantExperience()) { - logger.info(ae.toString()); + ParseApplicantExperience parseApplicantExperience = new ParseApplicantExperience(appDocList); + parseApplicantExperience.setApplicantExperience(); + for (ApplicantExperience applicantExperience : parseApplicantExperience.getApplicantExperience()) { + logger.info(applicantExperience.toString()); } + ParseApplicantEducation parseApplicantEducation = new ParseApplicantEducation(appDocList); + parseApplicantEducation.setApplicantEducation(); + for (ApplicantEducation applicantEducation : parseApplicantEducation.getApplicantEducation()) { + logger.info(applicantEducation.toString()); + } - //FetchApplicantExperience applicationExperience = new FetchApplicantExperience(appDocList); //FetchApplicantSkill applicantSkill = new FetchApplicantSkill(appDocList); - // insert application + applicationEducation + applicationExperience + applicantSkill in the database } diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java b/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java index c0364e9..4a020eb 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java @@ -1,8 +1,14 @@ package com.cv.parser.applicant; +import java.util.ArrayList; +import java.util.List; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.cv.parser.entity.ApplicantDocument; +import com.cv.parser.entity.ApplicantEducation; + /** * This is for storing data in {@link} ApplicantEducation object|entity; * @@ -11,8 +17,20 @@ */ public class ParseApplicantEducation { + Logger logger = LoggerFactory.getLogger(ParseApplicantEducation.class); - - + List appDocList = new ArrayList<>(); + + public ParseApplicantEducation(List appDocList) { + this.appDocList = appDocList; + } + + public void setApplicantEducation() { + + } + + public List getApplicantEducation() { + return null; + } } diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index b6a3353..a0dc60a 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -2,9 +2,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -12,7 +9,6 @@ import com.cv.parser.entity.ApplicantDocument; import com.cv.parser.entity.ApplicantExperience; import com.cv.parser.helper.ParserHelper; -import com.cv.parser.helper.WorkExperienceHelper; /** * Fetches data to be stored in {@link ApplicantExperience} @@ -66,10 +62,9 @@ private String findWorkExperience(int id, String line) { break; } } - // logger.info(line.substring(indexOfExperience, nextSectionIndex)); String experienceText = line.replaceFirst(RegEx.EXPERIENCE.toString(), ""); - // TODO call WorkExperienceHelper and parse the line return experienceText.substring(indexOfExperience, nextSectionIndex); } - + + } diff --git a/src/main/java/com/cv/parser/entity/ApplicantExperience.java b/src/main/java/com/cv/parser/entity/ApplicantExperience.java index 20e92ee..e4de6bf 100644 --- a/src/main/java/com/cv/parser/entity/ApplicantExperience.java +++ b/src/main/java/com/cv/parser/entity/ApplicantExperience.java @@ -7,7 +7,6 @@ public ApplicantExperience() { } private int id; - //private WorkExperienceHelper[] experience; // an applicant may have more than one experience private String experience; // store it as string for now // change to List only if managed to parse and store in WorkExperienceHelper diff --git a/src/main/java/com/cv/parser/extract/MSExtractor.java b/src/main/java/com/cv/parser/extract/MSExtractor.java index 46cd045..8f4a38b 100644 --- a/src/main/java/com/cv/parser/extract/MSExtractor.java +++ b/src/main/java/com/cv/parser/extract/MSExtractor.java @@ -2,7 +2,6 @@ import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -65,9 +64,7 @@ public void extractFiles() { msDoc = new XWPFDocument(fs); we = new XWPFWordExtractor(msDoc); this.contents.add(we.getText()); - } catch (FileNotFoundException e) { - logger.error(e.getMessage()); - } catch (IOException e) { + } catch (IOException | NullPointerException e) { logger.error(e.getMessage()); } finally { try { diff --git a/src/main/java/com/cv/parser/extract/TXTExtractor.java b/src/main/java/com/cv/parser/extract/TXTExtractor.java index 8209228..8142535 100644 --- a/src/main/java/com/cv/parser/extract/TXTExtractor.java +++ b/src/main/java/com/cv/parser/extract/TXTExtractor.java @@ -2,7 +2,6 @@ import java.io.BufferedReader; import java.io.File; -import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; @@ -52,8 +51,6 @@ public void extractFiles() { } this.contents.add(String.join(" ", content)); br.close(); - } catch (FileNotFoundException e) { - logger.error(e.getMessage()); } catch (IOException e) { logger.error(e.getMessage()); } diff --git a/src/main/java/com/cv/parser/helper/ParserHelper.java b/src/main/java/com/cv/parser/helper/ParserHelper.java index c4baa41..adf06c8 100644 --- a/src/main/java/com/cv/parser/helper/ParserHelper.java +++ b/src/main/java/com/cv/parser/helper/ParserHelper.java @@ -94,7 +94,7 @@ public List getIndexesOfSection(RegEx regEx, String line) { Collections.sort(indexesOfSection); return indexesOfSection; } - + /** * Read JSON file in resources folder and return it as a Map * From 9e4deadddba912e8ab078f759e90228e65878919 Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Wed, 23 May 2018 12:03:43 -0400 Subject: [PATCH 6/7] #2 parsing education by section (not too specific, i.e. school, degree) --- public/resume.txt | 1 - src/main/java/com/cv/parser/RegEx.java | 8 ++-- .../applicant/ParseApplicantEducation.java | 44 +++++++++++++++---- .../applicant/ParseApplicantExperience.java | 28 ++++++------ .../cv/parser/entity/ApplicantEducation.java | 28 ++++++++++++ 5 files changed, 81 insertions(+), 28 deletions(-) diff --git a/public/resume.txt b/public/resume.txt index 20a6a4d..e991474 100644 --- a/public/resume.txt +++ b/public/resume.txt @@ -63,7 +63,6 @@ Belarus, Minsk. February 2012 - September 2014 Real Estate Agency Assistant heals LLC, Full stack PHP Developer Belarus, Minsk, http://www.a-h.by; May 2011 - January 2012 -google.com Dynamic website design and programming using PHP, MySQL, HTML, CSS. Setup and administration of web servers and server software. Business consulting of securing/ planning project. Development to online marketing, search engine placement and promotion (http://www.mogu.by; http://www.a-h.by). diff --git a/src/main/java/com/cv/parser/RegEx.java b/src/main/java/com/cv/parser/RegEx.java index 463e54d..d0a58fc 100644 --- a/src/main/java/com/cv/parser/RegEx.java +++ b/src/main/java/com/cv/parser/RegEx.java @@ -4,12 +4,12 @@ public enum RegEx { LINK ("(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)"), EMAIL ("[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+"), PHONE ("\\(?([0-9]{3})\\)?[-. ]([0-9]{3})[-. ]?[-. ]?([0-9]{4})"), - OBJECTIVE ("\\b(Objective|Objectives|OBJECTIVE|OBJECTIVES)([^-!@#$%^&*()+.,?])\\b"), - EDUCATION ("\\b(Education|Educations|EDUCATION|EDUCATIONS)\\b"), + OBJECTIVE ("\\b(Objective(s?)|OBJECTIVE(S?)|Summary|SUMMARY)([^-!@#$%^&*()+.,?])\\b"), + EDUCATION ("\\b(Education(s?)|EDUCATION(S?))\\b"), EXPERIENCE ("\\b(Experience(s?)|EXPERIENCE(S?))\\b"), - SKILLS ("\\b(Skill(s?)|SKILL(S?)|Expertise(s?)|Skills\\s&?\\sExpertises)\\b"), + SKILLS ("\\b(Skill(s?)|SKILL(S?)|Expertise(s?)|Skills\\s&?\\sExpertises|Tools\\s&\\sTechnologies)\\b"), LANGUAGE ("\\b(Language(s?)|LANGUAGE(S?))\\b"), - INTEREST ("\\b(Interest(s?)|INTEREST(S?)|Activity|Activities|ACTIVITIES|ACTIVITY)\\b"), + INTEREST ("\\b(Interest(s?)|INTEREST(S?)|Activity|Activities|ACTIVITY|ACTIVITIES)\\b"), MEMBERSHIP ("\\b(Membership(s?)|MEMBERSHIP(S?))\\b"), ADDITIONAL ("\\b(Award(s?)|AWARD(S)|Honor(s?)|HONOR(S?)|Certification(s?)|CERTIFICATION(S?)|Accomplishment(s?)|ACCOMPLISHMENT(S?)|Project(s?)|PROJECT(S?))\\b"), DATEFROMTO ("([A-Za-z]+\\s)?([0-9]{4})\\s[-]\\s\\b((P|p)resent|(C|c)urrent)\\b|([A-Za-z]+\\s)?([0-9]{4})"); diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java b/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java index 4a020eb..564d8ab 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java @@ -6,8 +6,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.cv.parser.RegEx; import com.cv.parser.entity.ApplicantDocument; import com.cv.parser.entity.ApplicantEducation; +import com.cv.parser.helper.ParserHelper; /** * This is for storing data in {@link} ApplicantEducation object|entity; @@ -17,20 +19,46 @@ */ public class ParseApplicantEducation { - Logger logger = LoggerFactory.getLogger(ParseApplicantEducation.class); - List appDocList = new ArrayList<>(); + List applicantDocument = new ArrayList<>(); + List applicantEducationList = new ArrayList<>(); - public ParseApplicantEducation(List appDocList) { - this.appDocList = appDocList; + public ParseApplicantEducation(List applicantDocument) { + this.applicantDocument = applicantDocument; } public void setApplicantEducation() { - + for (ApplicantDocument ad : applicantDocument) { + ApplicantEducation applicantEducation = new ApplicantEducation(); + applicantEducation.setId(ad.getId()); + applicantEducation.setEducation(findEducations(ad.getLine())); + this.applicantEducationList.add(applicantEducation); + } } - + public List getApplicantEducation() { - return null; - } + return applicantEducationList; + } + + private String findEducations(String line) { + ParserHelper parser = new ParserHelper(); + // just like findWorkExperiences + int indexOfEducation = parser.getIndexOfThisSection(RegEx.EDUCATION, line); + int nextSectionIndex = 0; + List listOfSectionIndexes = parser.getIndexesOfSection(line); + String educationsText = line.replaceFirst(RegEx.EDUCATION.toString(), ""); + for (int index = 0; index < listOfSectionIndexes.size(); index++) { + if (listOfSectionIndexes.get(index) == indexOfEducation) { + // if education is the last section, then there is no nextSectionIndex + if (index == listOfSectionIndexes.size() - 1) { + return educationsText.substring(listOfSectionIndexes.get(index)); + } else { + nextSectionIndex = listOfSectionIndexes.get(index + 1); + break; + } + } + } + return educationsText.substring(indexOfEducation, nextSectionIndex); + } } diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index a0dc60a..01e513f 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -20,27 +20,27 @@ public class ParseApplicantExperience { Logger logger = LoggerFactory.getLogger(ParseApplicantExperience.class); - List appDocList = new ArrayList(); - List applicantExperience = new ArrayList(); + List applicantDocument = new ArrayList<>(); + List applicantExperienceList = new ArrayList<>(); - public ParseApplicantExperience(List appDocList) { - this.appDocList = appDocList; + public ParseApplicantExperience(List applicantDocument) { + this.applicantDocument = applicantDocument; } public void setApplicantExperience() { - for (ApplicantDocument ad : appDocList) { - ApplicantExperience ae = new ApplicantExperience(); - ae.setId(ad.getId()); - ae.setExperience(findWorkExperience(ad.getId(), ad.getLine())); - this.applicantExperience.add(ae); + for (ApplicantDocument ad : applicantDocument) { + ApplicantExperience applicantExperience = new ApplicantExperience(); + applicantExperience.setId(ad.getId()); + applicantExperience.setExperience(findWorkExperiences(ad.getId(), ad.getLine())); + this.applicantExperienceList.add(applicantExperience); } } public List getApplicantExperience() { - return applicantExperience; + return applicantExperienceList; } - private String findWorkExperience(int id, String line) { + private String findWorkExperiences(int id, String line) { ParserHelper parser = new ParserHelper(); /* * copy texts starting from experience section index to the following section index @@ -62,9 +62,7 @@ private String findWorkExperience(int id, String line) { break; } } - String experienceText = line.replaceFirst(RegEx.EXPERIENCE.toString(), ""); - return experienceText.substring(indexOfExperience, nextSectionIndex); + String experiencesText = line.replaceFirst(RegEx.EXPERIENCE.toString(), ""); + return experiencesText.substring(indexOfExperience, nextSectionIndex); } - - } diff --git a/src/main/java/com/cv/parser/entity/ApplicantEducation.java b/src/main/java/com/cv/parser/entity/ApplicantEducation.java index 2b13ad2..1e54607 100644 --- a/src/main/java/com/cv/parser/entity/ApplicantEducation.java +++ b/src/main/java/com/cv/parser/entity/ApplicantEducation.java @@ -2,4 +2,32 @@ public class ApplicantEducation { + public ApplicantEducation() { + + } + + private int id; + // store educations as String for now + private String education; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public String getEducation() { + return education; + } + + public void setEducation(String education) { + this.education = education; + } + + @Override + public String toString() { + return "ApplicantEducation [id=" + id + ", education=" + education + "]"; + } } From a4c91a625f85f5a0216aa996aabf7f8f635e79ec Mon Sep 17 00:00:00 2001 From: Raymart De Guzman Date: Wed, 23 May 2018 18:56:34 -0400 Subject: [PATCH 7/7] #7 parsing skills --- src/main/java/com/cv/parser/RegEx.java | 9 ++- .../cv/parser/applicant/DocumentDetails.java | 24 +++++--- .../applicant/ParseApplicantEducation.java | 30 +++++---- .../applicant/ParseApplicantExperience.java | 50 ++++++++------- .../parser/applicant/ParseApplicantSkill.java | 61 +++++++++++++++++++ .../com/cv/parser/entity/ApplicantSkill.java | 33 ++++++++++ 6 files changed, 161 insertions(+), 46 deletions(-) create mode 100644 src/main/java/com/cv/parser/applicant/ParseApplicantSkill.java create mode 100644 src/main/java/com/cv/parser/entity/ApplicantSkill.java diff --git a/src/main/java/com/cv/parser/RegEx.java b/src/main/java/com/cv/parser/RegEx.java index d0a58fc..a181e76 100644 --- a/src/main/java/com/cv/parser/RegEx.java +++ b/src/main/java/com/cv/parser/RegEx.java @@ -4,16 +4,21 @@ public enum RegEx { LINK ("(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)"), EMAIL ("[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+"), PHONE ("\\(?([0-9]{3})\\)?[-. ]([0-9]{3})[-. ]?[-. ]?([0-9]{4})"), - OBJECTIVE ("\\b(Objective(s?)|OBJECTIVE(S?)|Summary|SUMMARY)([^-!@#$%^&*()+.,?])\\b"), + OBJECTIVE ("\\b(Objective(s?)|OBJECTIVE(S?)|Summary|SUMMARY)([^-!@#$%^&*()+.,?])\\b"), // summary included here EDUCATION ("\\b(Education(s?)|EDUCATION(S?))\\b"), EXPERIENCE ("\\b(Experience(s?)|EXPERIENCE(S?))\\b"), - SKILLS ("\\b(Skill(s?)|SKILL(S?)|Expertise(s?)|Skills\\s&?\\sExpertises|Tools\\s&\\sTechnologies)\\b"), + SKILLS ("\\b(Skill(s?) & Expertise(s?)|Tool(s?) & Technolog(y?|ies?)|Skill(s?)|SKILL(S?))\\b"), LANGUAGE ("\\b(Language(s?)|LANGUAGE(S?))\\b"), INTEREST ("\\b(Interest(s?)|INTEREST(S?)|Activity|Activities|ACTIVITY|ACTIVITIES)\\b"), MEMBERSHIP ("\\b(Membership(s?)|MEMBERSHIP(S?))\\b"), ADDITIONAL ("\\b(Award(s?)|AWARD(S)|Honor(s?)|HONOR(S?)|Certification(s?)|CERTIFICATION(S?)|Accomplishment(s?)|ACCOMPLISHMENT(S?)|Project(s?)|PROJECT(S?))\\b"), DATEFROMTO ("([A-Za-z]+\\s)?([0-9]{4})\\s[-]\\s\\b((P|p)resent|(C|c)urrent)\\b|([A-Za-z]+\\s)?([0-9]{4})"); + /** + * Note: + * - if you have a combination of words make sure you put them in the beginning of the list + */ + private final String name; private RegEx(String name) { diff --git a/src/main/java/com/cv/parser/applicant/DocumentDetails.java b/src/main/java/com/cv/parser/applicant/DocumentDetails.java index 74eeafa..f656081 100644 --- a/src/main/java/com/cv/parser/applicant/DocumentDetails.java +++ b/src/main/java/com/cv/parser/applicant/DocumentDetails.java @@ -14,14 +14,15 @@ import com.cv.parser.entity.ApplicantDocument; import com.cv.parser.entity.ApplicantEducation; import com.cv.parser.entity.ApplicantExperience; +import com.cv.parser.entity.ApplicantSkill; public class DocumentDetails { Logger logger = LoggerFactory.getLogger(DocumentDetails.class); // toString(): ApplicantDocument [id={number}, details={resume details......}] - List appDocList = new ArrayList(); + List applicantDocumentList = new ArrayList<>(); - List superList = new ArrayList(); + List superList = new ArrayList<>(); Button btnSaveDocumentsToDb; public DocumentDetails(Button btnSaveDocumentsToDb, List superList) { @@ -35,7 +36,7 @@ public void handleEvent(org.eclipse.swt.widgets.Event arg0) { storeDocumentAsString(); - ParseApplicant application = new ParseApplicant(appDocList); + ParseApplicant application = new ParseApplicant(applicantDocumentList); application.setApplicantInfo(); for (Applicant a : application.getApplicants()) { logger.info(a.toString()); @@ -43,20 +44,23 @@ public void handleEvent(org.eclipse.swt.widgets.Event arg0) { //////////////////////////////////// - ParseApplicantExperience parseApplicantExperience = new ParseApplicantExperience(appDocList); - parseApplicantExperience.setApplicantExperience(); + ParseApplicantExperience parseApplicantExperience = new ParseApplicantExperience(applicantDocumentList); + parseApplicantExperience.setApplicantExperiences(); for (ApplicantExperience applicantExperience : parseApplicantExperience.getApplicantExperience()) { logger.info(applicantExperience.toString()); } - ParseApplicantEducation parseApplicantEducation = new ParseApplicantEducation(appDocList); - parseApplicantEducation.setApplicantEducation(); + ParseApplicantEducation parseApplicantEducation = new ParseApplicantEducation(applicantDocumentList); + parseApplicantEducation.setApplicantEducations(); for (ApplicantEducation applicantEducation : parseApplicantEducation.getApplicantEducation()) { logger.info(applicantEducation.toString()); } - //FetchApplicantSkill applicantSkill = new FetchApplicantSkill(appDocList); - // insert application + applicationEducation + applicationExperience + applicantSkill in the database + ParseApplicantSkill parseApplicantSkill = new ParseApplicantSkill(applicantDocumentList); + parseApplicantSkill.setApplicantSkills(); + for (ApplicantSkill applicantSkill : parseApplicantSkill.getApplicantSkillList()) { + logger.info(applicantSkill.toString()); + } } }); @@ -67,7 +71,7 @@ private void storeDocumentAsString() { for (int index = 0; index < superList.size(); index++) { String details = superList.get(index); String normalize = StringUtils.normalizeSpace(details); // i.e. hello world -> hello world - this.appDocList.add(new ApplicantDocument((index + 1), normalize)); + this.applicantDocumentList.add(new ApplicantDocument((index + 1), normalize)); } } diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java b/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java index 564d8ab..b5f25f7 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantEducation.java @@ -28,7 +28,7 @@ public ParseApplicantEducation(List applicantDocument) { this.applicantDocument = applicantDocument; } - public void setApplicantEducation() { + public void setApplicantEducations() { for (ApplicantDocument ad : applicantDocument) { ApplicantEducation applicantEducation = new ApplicantEducation(); applicantEducation.setId(ad.getId()); @@ -45,20 +45,24 @@ private String findEducations(String line) { ParserHelper parser = new ParserHelper(); // just like findWorkExperiences int indexOfEducation = parser.getIndexOfThisSection(RegEx.EDUCATION, line); - int nextSectionIndex = 0; - List listOfSectionIndexes = parser.getIndexesOfSection(line); - String educationsText = line.replaceFirst(RegEx.EDUCATION.toString(), ""); - for (int index = 0; index < listOfSectionIndexes.size(); index++) { - if (listOfSectionIndexes.get(index) == indexOfEducation) { - // if education is the last section, then there is no nextSectionIndex - if (index == listOfSectionIndexes.size() - 1) { - return educationsText.substring(listOfSectionIndexes.get(index)); - } else { - nextSectionIndex = listOfSectionIndexes.get(index + 1); - break; + if (indexOfEducation != -1) { + int nextSectionIndex = 0; + List listOfSectionIndexes = parser.getIndexesOfSection(line); + String educationsText = line.replaceFirst(RegEx.EDUCATION.toString(), ""); + for (int index = 0; index < listOfSectionIndexes.size(); index++) { + if (listOfSectionIndexes.get(index) == indexOfEducation) { + // if education is the last section, then there is no + // nextSectionIndex + if (index == listOfSectionIndexes.size() - 1) { + return educationsText.substring(indexOfEducation); + } else { + nextSectionIndex = listOfSectionIndexes.get(index + 1); + break; + } } } + return educationsText.substring(indexOfEducation, nextSectionIndex); } - return educationsText.substring(indexOfEducation, nextSectionIndex); + return null; } } diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java index 01e513f..2572749 100644 --- a/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantExperience.java @@ -27,7 +27,7 @@ public ParseApplicantExperience(List applicantDocument) { this.applicantDocument = applicantDocument; } - public void setApplicantExperience() { + public void setApplicantExperiences() { for (ApplicantDocument ad : applicantDocument) { ApplicantExperience applicantExperience = new ApplicantExperience(); applicantExperience.setId(ad.getId()); @@ -38,31 +38,39 @@ public void setApplicantExperience() { public List getApplicantExperience() { return applicantExperienceList; - } + } private String findWorkExperiences(int id, String line) { - ParserHelper parser = new ParserHelper(); - /* - * copy texts starting from experience section index to the following section index - * experience index is LESS THAN the following section index, therefore + ParserHelper parser = new ParserHelper(); + /* + * copy texts starting from experience section index to the following + * section index experience index is LESS THAN the following section + * index, therefore + * + * Example: section indexes [24, 355, 534, 669] index of experience + * section = 355 therefore, the following section index would be 534 we + * can get the texts that encompasses experience section by substring => + * (indexOfExperience, beginIndexOfFollowingSection) * - * Example: - * section indexes [24, 355, 534, 669] - * index of experience section = 355 - * therefore, the following section index would be 534 - * we can get the texts that encompasses experience section - * by substring => (indexOfExperience, beginIndexOfFollowingSection) - * */ int indexOfExperience = parser.getIndexOfThisSection(RegEx.EXPERIENCE, line); - int nextSectionIndex = 0; // index that follows experience section - for (int index = 0; index < parser.getIndexesOfSection(line).size(); index++) { - if (parser.getIndexesOfSection(line).get(index) == indexOfExperience) { - nextSectionIndex = parser.getIndexesOfSection(line).get(index + 1); - break; + if (indexOfExperience != -1) { + int nextSectionIndex = 0; // index that follows experience section + String experiencesText = line.replaceFirst(RegEx.EXPERIENCE.toString(), ""); + for (int index = 0; index < parser.getIndexesOfSection(line).size(); index++) { + if (parser.getIndexesOfSection(line).get(index) == indexOfExperience) { + // experience section is not always in the middle + // rarely they may appear as the last section + if (index == parser.getIndexesOfSection(line).size() - 1) { + return experiencesText.substring(indexOfExperience); + } else { + nextSectionIndex = parser.getIndexesOfSection(line).get(index + 1); + break; + } + } } - } - String experiencesText = line.replaceFirst(RegEx.EXPERIENCE.toString(), ""); - return experiencesText.substring(indexOfExperience, nextSectionIndex); + return experiencesText.substring(indexOfExperience, nextSectionIndex); + } + return null; } } diff --git a/src/main/java/com/cv/parser/applicant/ParseApplicantSkill.java b/src/main/java/com/cv/parser/applicant/ParseApplicantSkill.java new file mode 100644 index 0000000..3d74272 --- /dev/null +++ b/src/main/java/com/cv/parser/applicant/ParseApplicantSkill.java @@ -0,0 +1,61 @@ +package com.cv.parser.applicant; + +import java.util.ArrayList; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.cv.parser.RegEx; +import com.cv.parser.entity.ApplicantDocument; +import com.cv.parser.entity.ApplicantSkill; +import com.cv.parser.helper.ParserHelper; + +public class ParseApplicantSkill { + + Logger logger = LoggerFactory.getLogger(ParseApplicantSkill.class); + + List applicantDocumentList = new ArrayList<>(); + List applicantSkillList = new ArrayList<>(); + + public ParseApplicantSkill(List applicantDocumentList) { + this.applicantDocumentList = applicantDocumentList; + } + + public List getApplicantSkillList() { + return applicantSkillList; + } + + public void setApplicantSkills() { + for (ApplicantDocument applicantDocument : applicantDocumentList) { + ApplicantSkill applicantSkill = new ApplicantSkill(); + applicantSkill.setId(applicantDocument.getId()); + applicantSkill.setSkills(findApplicantSkills(applicantDocument.getLine())); + this.applicantSkillList.add(applicantSkill); + } + } + + private String findApplicantSkills(String line) { + ParserHelper parser = new ParserHelper(); + int indexOfSkillsSection = parser.getIndexOfThisSection(RegEx.SKILLS, line); + + if (indexOfSkillsSection != -1) { + List sectionIndexes = parser.getIndexesOfSection(line); + String skillsText = line.replaceFirst(RegEx.SKILLS.toString(), ""); + int nextSectionIndex = 0; + for (int index = 0; index < sectionIndexes.size(); index++) { + if (sectionIndexes.get(index) == indexOfSkillsSection) { + if (index == sectionIndexes.size() - 1) { + return skillsText.substring(indexOfSkillsSection); + } else { + nextSectionIndex = sectionIndexes.get(index + 1); + break; + } + } + } + return skillsText.substring(indexOfSkillsSection, nextSectionIndex); + } + return null; + } + +} diff --git a/src/main/java/com/cv/parser/entity/ApplicantSkill.java b/src/main/java/com/cv/parser/entity/ApplicantSkill.java new file mode 100644 index 0000000..5510064 --- /dev/null +++ b/src/main/java/com/cv/parser/entity/ApplicantSkill.java @@ -0,0 +1,33 @@ +package com.cv.parser.entity; + +public class ApplicantSkill { + + public ApplicantSkill() { + + } + + private int id; + private String skills; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public String getSkills() { + return skills; + } + + public void setSkills(String skills) { + this.skills = skills; + } + + @Override + public String toString() { + return "ApplicantSkill [id=" + id + ", skills=" + skills + "]"; + } + +}