-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.js
159 lines (130 loc) · 4.59 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
const cheerio = require('cheerio');
const redis = require('redis')
const puppeteer = require('puppeteer')
const client = redis.createClient()
const fs = require('fs')
const Parse = (async (page) => {
/// UNCOMMENT IF NEEDED
/// PAGE SCROLLER
// await page.evaluate(async () => {
// await new Promise((resolve) => {
// let totalHeight = 0;
// const distance = 1000; // Adjust scroll distance if needed
// const timer = setInterval(() => {
// const scrollHeight = document.body.scrollHeight;
// window.scrollBy(0, distance);
// totalHeight += distance;
// if (totalHeight >= scrollHeight) {
// clearInterval(timer);
// resolve();
// }
// }, 1); // Adjust scroll speed as needed
// });
// });
const coinInfo = [
'id',
'icon',
'name',
'symbol',
'price',
'1h',
'24h',
'7d',
'shortMarketCap',
'makertCap',
'Volume24h',
'VolumeInCoin',
'Circulating Supply',
'7dCandle'
]
const $ = cheerio.load(await page.content());
const selector = '#__next > div > div.main-content > div.cmc-body-wrapper > div > div:nth-child(1) > div.sc-beb003d5-2.bkNrIb > table > tbody > tr'
$(selector).each(async (parentIndex, parentElem) => {
var indx = 0
const coin = {}
$(parentElem).children().each((index, elem) => {
var value = $(elem).text()
if(index == 2){
value = $('div > a > div',$(elem).html()).find('img').attr('src')
coin[coinInfo[indx]] = value
indx++
value = $('p:first-child',$(elem).html()).text()
coin[coinInfo[indx]] = value
indx++
value = $('.coin-item-symbol',$(elem).html()).text()
coin[coinInfo[indx]] = value
indx++
}
else if(index == 7){
value = $('span:first-child',$(elem).html()).text()
coin[coinInfo[indx]] = value
indx++
value = $('span:eq(1)',$(elem).html()).text()
coin[coinInfo[indx]] = value
indx++
}
else if(index == 8){
value = $('p:eq(0)',$(elem).html()).text()
coin[coinInfo[indx]] = value
indx++
value = $('p:eq(1)',$(elem).html()).text()
coin[coinInfo[indx]] = value
indx++
}
else if(value){
coin[coinInfo[indx]] = value
indx++
}
else if(index == 10){
value = $($(elem)).find('img').attr('src')
coin[coinInfo[indx]] = value
indx++
}
///SAVE PATH FOR ICONS
const path = `./coin_icon/icon_${coin.symbol}.png`
///UNCOMENT IF NEEDED ICONS
//saveIcon(coin.icon, path )
})
await client.hSet('CoinMarketCap', coin.symbol, JSON.stringify(coin))
})
})
const saveIcon = (imageUrl, savePath) => {
https.get(imageUrl, response => {
const fileStream = fs.createWriteStream(savePath);
response.pipe(fileStream);
fileStream.on('finish', () => {
fileStream.close();
console.log('Image saved successfully.');
});
}).on('error', error => {
console.error('Error downloading the image:', error);
});
}
const startParse = async () => {
console.log('Started parsing...')
const startTime = new Date()
await client.connect();
const browser = await puppeteer.launch({ headless: "false" });
const maxConcurrentPages = 12; // Adjust the number of concurrent pages as per your system's capacity
const promises = [];
for (let i = 1; i <= 97; i++) {
const promise = (async () => {
const page = await browser.newPage();
page.setViewport({ width: 640, height: 8000, })
const URL = `https://coinmarketcap.com/?page=${i}`;
await page.goto(URL);
await Parse(page);
await page.close();
})();
promises.push(promise);
if (promises.length >= maxConcurrentPages) {
await Promise.race(promises);
promises.splice(0, maxConcurrentPages);
}
}
await Promise.all(promises);
await browser.close();
const endTime = new Date()
console.log('Finished in', endTime - startTime, 'ms')
};
startParse();