Skip to content

Commit

Permalink
Bug fixes + added new code
Browse files Browse the repository at this point in the history
Read readme.md file for more info
  • Loading branch information
DrakenWan committed Dec 29, 2022
1 parent 7fb472b commit 46796fe
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 49 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ However, the server sided script can still be implemented to this day. I recomme
Section Name | Can Extract? | Clean? | Deepscan Extraction?
:----------------- | :----------------- | :----------------- | :------------------
*profile data* | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark:
*experience section*| :x: | :x: | :heavy_check_mark:
*experience section*| :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark:
*education section*| :x: | :x: | :x:
*certifications* | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark:
*volunteer experience*| :x: | :x: | :x:
Expand All @@ -37,6 +37,13 @@ I strive to make the code as general as possible but the extractor tool may not

I will keep posting timed updates here. In future will shift these somewhere else if I have time

#### Update(dated: 29th December, 2022)

- I have fixed the extraction of `experience section`. It is working fine for 30 LinkedIn profile pages I verified it with.
- I have added a `clear text` button to clear textbox content.
- Removed the old, redundant code with new code or deleted it entirely.
- Will start working on writing code for scraping other sections that are left.

#### Update(dated: 27th December, 2022)

Apparently, the HTML code for `experience section` has been changed by a slight. But that is huge since experience section was hardest to generalize for me. It will take time to make further correction to it. The deepscan extraction still works for this section since the HTML document for all of them new pages is same and not changed.
Expand Down
6 changes: 6 additions & 0 deletions css/content.css
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@
transform: translateY(4px);
}

.sticky_buttons {
position: sticky;
display: block;
font-size: 16px;
margin: 15px 25px 0px 15px;;
}

#deepscan {
padding: 10px;
Expand Down
89 changes: 41 additions & 48 deletions scripts/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,15 @@ function main() {

///////// VARIABLES //////////
// define any VARIABLES below here
var data = {};



// Edit this string to edit the slider popup
/*
NOTE: Unable to access the slider.html file even after using troubleshoot
steps from google. So I am using the content script to inject the
slider file into the webpage.
*/
// (appears on clicking extension
// icon)
var sliderInnerHTMLString = "\
Expand All @@ -35,6 +40,8 @@ function main() {
<div id='deepscancontainer'>\
<label id='deepscanlabel' for='deepscan'>Deepscan?<input type='checkbox' name='deepscan' id='deepscan' value='deepscan'/></label>\
</div>\
<div class='internal_button sticky_buttons' id='clear_text_button'>Clear Text?</div>\
<br/>\
</div>\
<br/>\
\
Expand All @@ -43,7 +50,8 @@ function main() {
<div id='sbodycontainer'>\
<br/>\
<br/>\
<textarea id='objectvalue'></textarea>\
<span style='font-size: 10px'><i>This textbox extracts if you scroll.</i></small>\
<textarea id='basicprofile'></textarea>\
<br/>\
<h2> Education Section </h2>\
<br/>\
Expand Down Expand Up @@ -80,78 +88,63 @@ function main() {

//////////VARIABLES END///////////

//expand page sections
//expandButtons();

//generate the DOM nodes below


// generate the DOM nodes below //

sliderGen(sliderInnerHTMLString);

//DOM node generators above//
// DOM node generators above //


//listener to trigger action - which is to push in/out
//the slider
//the slider toggle works with the service_worker file
chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) {
if(msg.todo == "toggle") {
slider();
}
});



//Added this as a temporary solution
//Issue: The page doesn't fully load and content script
// runs only once
//Resolution: Added trigger through window.onscroll
// function to register extraction everytime
// a user scrolls on the webpage.

data = extract();
var bodycontainer = document.getElementById("slider").querySelector("#sbodycontainer");
bodycontainer = bodycontainer.querySelector("#objectvalue")
bodycontainer.value = JSON.stringify(data)

bodycontainer = document.getElementById("slider").querySelector("#sheaderheader");
var uname = document?.querySelector('div.pv-text-details__left-panel > div > h1') || null;
uname = uname?.textContent || "";
bodycontainer.innerHTML = "<h1>"+uname+"</h1>";
window.onscroll = function() {
data = extract();
//alert(JSON.stringify(data));
var bodycontainer = document.getElementById("slider").querySelector("#sbodycontainer");
bodycontainer = bodycontainer.querySelector("#objectvalue")
bodycontainer.value = JSON.stringify(data)

bodycontainer = document.getElementById("slider").querySelector("#sheaderheader");
var uname = document?.querySelector('div.pv-text-details__left-panel > div > h1') || null;
uname = uname?.textContent || "";
bodycontainer.innerHTML = "<h1>"+uname+"</h1>";
}

//run savePDF option
document.getElementById('savepdf').addEventListener("click", savePDF);

//Clear text button action
document.getElementById('clear_text_button').addEventListener("click", function() {
var ids = ['basicprofile', 'educationtext', 'experiencetext', 'skillstext', 'certificationstext' ];
for(var i=0; i<ids.length; i++) {
document.getElementById(ids[i]).value = "";
}
});


window.onmousemove = function () {
// any heavyduty function added here might create an overhead or slowing down.
printName();
}

window.onscroll = function () {
printName();
document.getElementById('basicprofile').value = JSON.stringify(extract());
}

//deploying listeners for `manual extraction` buttons feature
document.getElementById('certification_extract_button').addEventListener("click", extractCert);
document.getElementById('skills_extract_button').addEventListener("click", extractSkills);
document.getElementById('experience_extract_button').addEventListener("click", extractExperience);
document.getElementById('education_extract_button').addEventListener("click", extractEducation);
}
} //MAIN FUNCTION ENDS HERE //


//*=======================================================*//


//extract btn generator
// function extractBtnGen() {
// var extractBtn = document.createElement("div");
// extractBtn.textContent = "Toggle Frame";
// extractBtn.id = "extractBtn";
// document.querySelector("#global-nav").append(extractBtn)
// }
function printName() {
var uname = document?.querySelector('div.pv-text-details__left-panel > div > h1') || document?.getElementsByClassName('artdeco-entity-lockup__title ember-view')[0] || null;
uname = uname?.textContent || "";
uname = getCleanText(uname);
document.getElementById('slider').querySelector('#sheaderheader').innerHTML = "<h1>" + uname + "</h1>";
}

//slider window element generator
function sliderGen(sliderInnerHTMLString) {
Expand Down Expand Up @@ -572,7 +565,7 @@ function extractExperience() {
roles = [];


var elem = list[i].querySelector('div > div').firstElementChild.nextElementSibling; //for anchor 1
var elem = list[i].querySelector('div > div').nextElementSibling; //for anchor 1
if(elem.querySelector('div > a')) {
// condition for multiple roles in same company
company = elem.querySelector('div > a > div > span > span')?.textContent || "";
Expand All @@ -585,7 +578,7 @@ function extractExperience() {
// traversing roles list in a company

var keke = elems[j].querySelector("div > div")?.nextElementSibling || null;
keke = keke.querySelector('div > a')
keke = keke?.querySelector('div > a') || null;

kchilds = keke.children;
var rname=" ", startDate=" ", endDate=" ", loc=" ";
Expand Down

0 comments on commit 46796fe

Please sign in to comment.