-
Notifications
You must be signed in to change notification settings - Fork 0
/
PuppeteerScraper.js
50 lines (38 loc) · 1.26 KB
/
PuppeteerScraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
const puppeteer = require('puppeteer');
const randomUA = require('modern-random-ua');
module.exports = class PuppeteerScraper {
constructor(configPath= "../config/scrapingConfig.json") {
this.browser = null;
this.page = null;
this.url = "http://rivanimal.org/";
require('dotenv').config();
}
async initializePuppeteer() {
this.browser = await puppeteer.launch({
userAgent: randomUA.generate(),
headless: false,
args: ['--no-sandbox']
});
this.page = await this.browser.newPage();
}
async startScraper() {
await this.initializePuppeteer();
console.log("---");
console.log("extracting content of ");
console.log(this.url);
await this.page.goto(this.url, {waitUntil: 'load', timeout: 0});
await this.extractUrls();
}
async extractUrls(){
//extract all divs with animal data
const divs = await this.page.$$('div.card-footer');
for (const div of divs) {
this.extractUrlFromDiv(div);
}
}
async extractUrlFromDiv(div){
const a = await div.$('a');
const href = await (await a.getProperty('href')).jsonValue();
console.log(href);
}
}