Skip to content

Commit 7faa894

Browse files
authored
Implement retry (#13)
* Implement retry * Update supported python versions * Drop support for Python3.6 * Lint max col * Linter complient * Update version and README
1 parent 1584346 commit 7faa894

File tree

11 files changed

+125
-62
lines changed

11 files changed

+125
-62
lines changed

.github/workflows/main.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
lint:
77
runs-on: ubuntu-latest
88
steps:
9-
- uses: actions/checkout@v2
9+
- uses: actions/checkout@v3
1010
- name: Install dependencies
1111
run: |
1212
make deps
@@ -17,11 +17,11 @@ jobs:
1717
runs-on: ubuntu-latest
1818
strategy:
1919
matrix:
20-
python-version: ['3.6', '3.7', '3.8']
20+
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
2121
steps:
22-
- uses: actions/checkout@v2
22+
- uses: actions/checkout@v3
2323
- name: Set up Python
24-
uses: actions/setup-python@v2
24+
uses: actions/setup-python@v4
2525
with:
2626
python-version: ${{ matrix.python-version }}
2727
- name: Install dependencies

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,22 @@ Here a little exemple on how to retrieve and store a screenshot from the Scrapin
122122
## Using ScrapingBee with Scrapy
123123

124124
Scrapy is the most popular Python web scraping framework. You can easily [integrate ScrapingBee's API with the Scrapy middleware](https://github.com/ScrapingBee/scrapy-scrapingbee).
125+
126+
127+
## Retries
128+
129+
The client includes a retry mechanism for 5XX responses.
130+
131+
```python
132+
>>> from scrapingbee import ScrapingBeeClient
133+
134+
>>> client = ScrapingBeeClient(api_key='REPLACE-WITH-YOUR-API-KEY')
135+
136+
>>> response = client.get(
137+
'https://www.scrapingbee.com/blog/',
138+
params={
139+
'render_js': True,
140+
},
141+
retries=5
142+
)
143+
```

requirements.txt

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,24 @@
1+
attrs==22.2.0
2+
certifi==2022.12.7
3+
charset-normalizer==3.1.0
4+
distlib==0.3.6
5+
filelock==3.10.0
16
flake8==3.9.2
2-
pytest==5.4.2
7+
idna==3.4
8+
iniconfig==2.0.0
9+
mccabe==0.6.1
10+
more-itertools==9.1.0
11+
packaging==23.0
12+
platformdirs==3.1.1
13+
pluggy==0.13.1
14+
py==1.11.0
15+
pycodestyle==2.7.0
16+
pyflakes==2.3.1
17+
pytest==7.2.2
18+
requests==2.28.2
19+
six==1.16.0
20+
toml==0.10.2
321
tox==3.23.1
4-
wheel==0.36.2
22+
urllib3==1.26.15
23+
virtualenv==20.21.0
24+
wcwidth==0.2.6

scrapingbee/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.1.8'
1+
__version__ = '1.2.0'

scrapingbee/client.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
from requests import request, Response
1+
from typing import Optional
2+
3+
from requests import Response, Session
4+
from requests.adapters import HTTPAdapter
5+
from urllib3.util import Retry
26

3-
from .default_headers import default_headers
47
from .utils import get_scrapingbee_url, process_headers
58

69

@@ -14,23 +17,21 @@ def request(
1417
self,
1518
method: str,
1619
url: str,
17-
params: dict = None,
18-
data: dict = None,
19-
json: dict = None,
20-
headers: dict = None,
21-
cookies: dict = None,
20+
params: Optional[dict] = None,
21+
data: Optional[dict] = None,
22+
json: Optional[dict] = None,
23+
headers: Optional[dict] = None,
24+
cookies: Optional[dict] = None,
25+
retries: Optional[int] = None,
2226
**kwargs
2327
) -> Response:
2428
if not params:
2529
params = {}
2630

2731
# Process headers and set forward_headers
2832
if headers:
29-
headers = process_headers(headers)
3033
params["forward_headers"] = True
31-
else:
32-
headers = {}
33-
headers.update(default_headers)
34+
headers = process_headers(headers)
3435

3536
# Add cookies to params
3637
if cookies:
@@ -40,12 +41,27 @@ def request(
4041
# Get ScrapingBee API URL
4142
spb_url = get_scrapingbee_url(self.api_url, self.api_key, url, params)
4243

44+
session = Session()
45+
if retries:
46+
# Retries if it is a network error or a 5xx error on an idempotent request (GET)
47+
retries = Retry(total=retries, raise_on_status=False, status_forcelist=frozenset(range(500, 600)))
48+
session.mount('https://', HTTPAdapter(max_retries=retries))
49+
session.mount('http://', HTTPAdapter(max_retries=retries))
50+
4351
if not data and json is not None:
44-
return request(method, spb_url, json=json, headers=headers, **kwargs)
45-
return request(method, spb_url, data=data, headers=headers, **kwargs)
52+
return session.request(method, spb_url, json=json, headers=headers, **kwargs)
53+
return session.request(method, spb_url, data=data, headers=headers, **kwargs)
4654

47-
def get(self, url: str, params: dict = None, headers: dict = None, cookies: dict = None, **kwargs) -> Response:
48-
return self.request("GET", url, params=params, headers=headers, cookies=cookies, **kwargs)
55+
def get(
56+
self,
57+
url: str,
58+
params: dict = None,
59+
headers: dict = None,
60+
cookies: dict = None,
61+
retries: Optional[int] = None,
62+
**kwargs
63+
) -> Response:
64+
return self.request("GET", url, params=params, headers=headers, cookies=cookies, retries=retries, **kwargs)
4965

5066
def post(
5167
self,

scrapingbee/default_headers.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

scrapingbee/utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import base64
22
import json
33
import urllib
4+
from typing import Optional
5+
6+
from .__version__ import __version__
7+
8+
DEFAULT_HEADERS = {"User-Agent": f"ScrapingBee-Python/{__version__}"}
49

510

611
def process_url(url: str) -> str:
@@ -11,8 +16,10 @@ def process_js_snippet(js_snippet: str) -> str:
1116
return base64.b64encode(js_snippet.encode()).decode()
1217

1318

14-
def process_headers(headers: dict, prefix: str = 'Spb-') -> dict:
15-
return {f'{prefix}{k}': v for k, v in headers.items()}
19+
def process_headers(headers: Optional[dict], prefix: str = 'Spb-') -> dict:
20+
headers = headers or {}
21+
headers = {f'{prefix}{k}': v for k, v in headers.items()}
22+
return {**DEFAULT_HEADERS, **headers}
1623

1724

1825
def process_cookies(cookies: dict) -> str:

setup.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@
2424
'License :: OSI Approved :: MIT License',
2525
'Programming Language :: Python',
2626
'Programming Language :: Python :: 3',
27-
'Programming Language :: Python :: 3.6',
2827
'Programming Language :: Python :: 3.7',
2928
'Programming Language :: Python :: 3.8',
29+
'Programming Language :: Python :: 3.9',
30+
'Programming Language :: Python :: 3.10',
31+
'Programming Language :: Python :: 3.11',
3032
'Topic :: Software Development :: Libraries :: Python Modules',
3133
],
32-
python_requires='>=3.6',
34+
python_requires='>=3.7',
3335
install_requires=['requests'],
3436
)

tests/test_client.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,76 +3,76 @@
33
import pytest
44

55
from scrapingbee import ScrapingBeeClient
6-
from scrapingbee.default_headers import default_headers
6+
from scrapingbee.utils import DEFAULT_HEADERS
77

88

99
@pytest.fixture(scope='module')
1010
def client():
1111
return ScrapingBeeClient(api_key='API_KEY')
1212

1313

14-
@mock.patch('scrapingbee.client.request')
15-
def test_get(mock_request, client):
14+
@mock.patch('scrapingbee.client.Session')
15+
def test_get(mock_session, client):
1616
'''It should make a GET request with the url and API key'''
1717
client.get('https://httpbin.org')
1818

19-
mock_request.assert_called_with(
19+
mock_session.return_value.request.assert_called_with(
2020
'GET',
2121
'https://app.scrapingbee.com/api/v1/'
2222
'?api_key=API_KEY&url=https%3A//httpbin.org',
2323
data=None,
24-
headers=default_headers
24+
headers=DEFAULT_HEADERS
2525
)
2626

2727

28-
@mock.patch('scrapingbee.client.request')
29-
def test_get_with_params(mock_request, client):
28+
@mock.patch('scrapingbee.client.Session')
29+
def test_get_with_params(mock_session, client):
3030
'''It should add parameters to the url'''
3131
client.get('https://httpbin.org', params={'render_js': True})
3232

33-
mock_request.assert_called_with(
33+
mock_session.return_value.request.assert_called_with(
3434
'GET',
3535
'https://app.scrapingbee.com/api/v1/'
3636
'?api_key=API_KEY&url=https%3A//httpbin.org&render_js=True',
3737
data=None,
38-
headers=default_headers,
38+
headers=DEFAULT_HEADERS,
3939
)
4040

4141

42-
@mock.patch('scrapingbee.client.request')
43-
def test_get_with_headers(mock_request, client):
42+
@mock.patch('scrapingbee.client.Session')
43+
def test_get_with_headers(mock_session, client):
4444
'''It should prefix header names with Spb- and set forward_headers'''
4545
client.get('https://httpbin.org', headers={'Content-Type': 'text/html; charset=utf-8'})
4646

47-
mock_request.assert_called_with(
47+
mock_session.return_value.request.assert_called_with(
4848
'GET',
4949
'https://app.scrapingbee.com/api/v1/'
5050
'?api_key=API_KEY&url=https%3A//httpbin.org&forward_headers=True',
5151
data=None,
5252
headers={'Spb-Content-Type': 'text/html; charset=utf-8',
53-
**default_headers},
53+
**DEFAULT_HEADERS},
5454
)
5555

5656

57-
@mock.patch('scrapingbee.client.request')
58-
def test_get_with_cookies(mock_request, client):
57+
@mock.patch('scrapingbee.client.Session')
58+
def test_get_with_cookies(mock_session, client):
5959
'''It should format the cookies and add them to the url'''
6060
client.get('https://httpbin.org', cookies={
6161
'name_1': 'value_1',
6262
'name_2': 'value_2',
6363
})
6464

65-
mock_request.assert_called_with(
65+
mock_session.return_value.request.assert_called_with(
6666
'GET',
6767
'https://app.scrapingbee.com/api/v1/'
6868
'?api_key=API_KEY&url=https%3A//httpbin.org&cookies=name_1=value_1;name_2=value_2',
6969
data=None,
70-
headers=default_headers,
70+
headers=DEFAULT_HEADERS,
7171
)
7272

7373

74-
@mock.patch('scrapingbee.client.request')
75-
def test_get_with_extract_rules(mock_request, client):
74+
@mock.patch('scrapingbee.client.Session')
75+
def test_get_with_extract_rules(mock_session, client):
7676
'''It should format the extract_rules and add them to the url'''
7777
client.get('https://httpbin.org', params={
7878
'extract_rules': {
@@ -81,19 +81,19 @@ def test_get_with_extract_rules(mock_request, client):
8181
}
8282
})
8383

84-
mock_request.assert_called_with(
84+
mock_session.return_value.request.assert_called_with(
8585
'GET',
8686
'https://app.scrapingbee.com/api/v1/'
8787
'?api_key=API_KEY&url=https%3A//httpbin.org&'
8888
'extract_rules=%7B%22title%22%3A%20%22h1%22%2C%20%22'
8989
'subtitle%22%3A%20%22%23subtitle%22%7D',
9090
data=None,
91-
headers=default_headers,
91+
headers=DEFAULT_HEADERS,
9292
)
9393

9494

95-
@mock.patch('scrapingbee.client.request')
96-
def test_get_with_js_scenario(mock_request, client):
95+
@mock.patch('scrapingbee.client.Session')
96+
def test_get_with_js_scenario(mock_session, client):
9797
'''It should format the extract_rules and add them to the url'''
9898
client.get('https://httpbin.org', params={
9999
'js_scenario': {
@@ -103,24 +103,24 @@ def test_get_with_js_scenario(mock_request, client):
103103
}
104104
})
105105

106-
mock_request.assert_called_with(
106+
mock_session.return_value.request.assert_called_with(
107107
'GET',
108108
'https://app.scrapingbee.com/api/v1/'
109109
'?api_key=API_KEY&url=https%3A//httpbin.org&'
110110
'js_scenario=%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D',
111111
data=None,
112-
headers=default_headers,
112+
headers=DEFAULT_HEADERS,
113113
)
114114

115115

116-
@mock.patch('scrapingbee.client.request')
117-
def test_post(mock_request, client):
116+
@mock.patch('scrapingbee.client.Session')
117+
def test_post(mock_session, client):
118118
'''It should make a POST request with some data'''
119119
client.post('https://httpbin.org', data={'KEY_1': 'VALUE_1'})
120120

121-
mock_request.assert_called_with(
121+
mock_session.return_value.request.assert_called_with(
122122
'POST',
123123
'https://app.scrapingbee.com/api/v1/?api_key=API_KEY&url=https%3A//httpbin.org',
124124
data={'KEY_1': 'VALUE_1'},
125-
headers=default_headers
125+
headers=DEFAULT_HEADERS
126126
)

tests/test_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,17 @@ def test_process_js_snippet():
2626
def test_process_headers():
2727
'''It should add a Spb- prefix to header names'''
2828
output = process_headers({'Accept-Language': 'En-US'})
29-
assert output == {'Spb-Accept-Language': 'En-US'}
29+
assert output == {
30+
'User-Agent': 'ScrapingBee-Python/1.2.0',
31+
'Spb-Accept-Language': 'En-US',
32+
}
3033

3134

3235
def test_process_cookies():
3336
'''It should format cookies to a string'''
3437
output = process_cookies({
3538
'name_1': 'value_1',
36-
'name_2': 'value_2'
39+
'name_2': 'value_2',
3740
})
3841
assert output == 'name_1=value_1;name_2=value_2'
3942

0 commit comments

Comments
 (0)