-
Notifications
You must be signed in to change notification settings - Fork 11
/
linkedin_intern_scraper.js
119 lines (110 loc) · 2.97 KB
/
linkedin_intern_scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
const { events, LinkedinScraper } = require("linkedin-jobs-scraper");
(async () => {
// Programatically disable logger
setTimeout(() => LinkedinScraper.disableLogger(), 5000);
// Each scraper instance is associated with one browser.
// Concurrent queries will be runned on different pages within the same browser instance.
const scraper = new LinkedinScraper({
headless: true,
slowMo: 100 // used to be 10
});
var res = {
table: []
};
// Listen for custom events
scraper.on(
events.custom.data,
({
query,
location,
link,
title,
company,
place,
date,
senorityLevel,
jobFunction,
employmentType,
industries
}) => {
res.table.push({
query: query,
location: location,
title: title,
company: company,
place: place,
date: date,
// description: description,
link: link,
senorityLevel: senorityLevel,
function: jobFunction,
employmentType: employmentType,
industries: industries
});
}
);
scraper.on(events.custom.error, err => {
console.error(err);
});
scraper.on(events.custom.end, () => {
console.log("All done!");
let fs = require("fs");
let time = '{"time" : ' + '"' + new Date().toLocaleString() + '",';
fs.writeFile(
"src/linkedin_intern_output.json",
time + '"data" : ' + JSON.stringify(res.table) + "}",
"utf8",
() => {}
);
});
// Listen for puppeteer specific browser events
scraper.on(events.puppeteer.browser.targetcreated, () => {});
scraper.on(events.puppeteer.browser.targetchanged, () => {});
scraper.on(events.puppeteer.browser.targetdestroyed, () => {});
scraper.on(events.puppeteer.browser.disconnected, () => {});
// This will be executed on browser side
// Run queries concurrently
await Promise.all([
scraper.run(["software intern"], ["Canada"], {
paginationMax: 2,
optimize: true
}),
scraper.run("software co-op", "Canada", {
paginationMax: 2,
// filter: {
// relevance: ERelevanceFilterOptions.RELEVANT
// },
optimize: true
}),
scraper.run("software developer co-op", "Canada", {
paginationMax: 2,
// filter: {
// relevance: ERelevanceFilterOptions.RELEVANT
// },
optimize: true
}),
scraper.run("software developer intern", "Canada", {
paginationMax: 2,
// filter: {
// relevance: ERelevanceFilterOptions.RELEVANT
// },
optimize: true
}),
scraper.run("software engineer intern", "Canada", {
paginationMax: 2,
// filter: {
// relevance: ERelevanceFilterOptions.RELEVANT
// },
optimize: true
}),
scraper.run("software engineer co-op", "Canada", {
paginationMax: 2,
// filter: {
// relevance: ERelevanceFilterOptions.RELEVANT
// },
optimize: true
})
]);
// Close browser
await scraper.close();
})();