-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_coles_soup_mix.R
55 lines (41 loc) · 1.06 KB
/
scrape_coles_soup_mix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
library(polite)
library(rvest)
library(tidyverse)
library(lubridate)
library(RSelenium)
# pull down the docker container and give a name
system("docker run -d --name my_container -p 4445:4444 selenium/standalone-chrome:2.53.0")
# check it is running
system("docker ps")
# get your remote driver set up and ready to go
remDr <- remoteDriver(
remoteServerAddr = "localhost",
port = 4445L,
browserName = "chrome"
)
remDr$open()
remDr$getStatus()
website <- "https://shop.coles.com.au/a/national/everything/search/soup%20mix?pageNumber=1"
remDr$navigate(website)
html <- remDr$getPageSource()[[1]]
html %>%
read_html() %>%
html_elements(".product-pricing-info") %>%
html_text() %>%
parse_number()
html %>%
read_html() %>%
html_elements("span.price-container") %>%
html_text() %>%
parse_number()
dollar <- html %>%
read_html() %>%
html_elements("span.dollar-value") %>%
html_text()
cent <- html %>%
read_html() %>%
html_elements(".cent-value") %>%
html_text()
remDr$close()
system("docker stop my_container")
system("docker rm my_container")