-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchap6_Request.py
58 lines (43 loc) · 1.5 KB
/
chap6_Request.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import requests
from bs4 import BeautifulSoup
# set usl root
url = "https://weworkremotely.com/categories/remote-full-stack-programming-jobs"
response = requests.get(url)
# This is for checking request success
# print(response.status_code)
soup = BeautifulSoup(
response.content,
"html.parser",
)
# to find class in html, you have to use class_
# soup can use find method after use it.
jobs = soup.find("section", class_="jobs").find_all("li")[1:-1] # [1:-1] is just for passing first and end things of "li"
# This variable saves job_data dictionary
all_jobs = []
# find_all method return list
# The data type of jobs is List
for job in jobs:
title = job.find("span", class_="title").text
# named contain 'company' class was three, so I made three variable
company, contract_type, region = job.find_all("span", class_="company")
# get text data from <span>
company = company.text
contract_type = contract_type.text
region = region.text
# if can't find it, return None
url = job.find("div", class_="tooltip--flag-logo").next_sibling
# This is for checking success
if url:
url = url["href"]
job_data = {
"title":title,
"company":company,
"Contract_type":contract_type,
"region":region,
"url":f"https://weworkremotely.com" + url,
}
# add job information in all_jobs
all_jobs.append(job_data)
# check the all_jobs
for i in all_jobs:
print(i)