Skip to content

Commit 0441024

Browse files
authored
Fix handling of AI extract rules (#19)
* Fix handling of AI extract rules * Drop support for Python 3.7
1 parent f3a7669 commit 0441024

File tree

8 files changed

+40
-14
lines changed

8 files changed

+40
-14
lines changed

.github/workflows/main.yaml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ubuntu-latest
1818
strategy:
1919
matrix:
20-
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
20+
python-version: ['3.8', '3.9', '3.10', '3.11']
2121
steps:
2222
- uses: actions/checkout@v3
2323
- name: Set up Python
@@ -42,12 +42,6 @@ jobs:
4242
- name: Build package
4343
run: >-
4444
make build
45-
- name: Publish package to Test PyPI (always)
46-
uses: pypa/gh-action-pypi-publish@release/v1
47-
with:
48-
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
49-
repository_url: https://test.pypi.org/legacy/
50-
skip_existing: true
5145
- name: Publish package to PyPI (only if pushing a tag)
5246
if: startsWith(github.ref, 'refs/tags')
5347
uses: pypa/gh-action-pypi-publish@release/v1

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/
4040
'device': 'desktop',
4141
# Use some data extraction rules
4242
'extract_rules': {'title': 'h1'},
43+
# Use AI to extract data from the page
44+
'ai_extract_rules': {'product_name': 'The name of the product', 'price': 'The price in USD'},
4345
# Wrap response in JSON
4446
'json_response': False,
4547
# Interact with the webpage you want to scrape

requirements.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@ certifi==2022.12.7
33
charset-normalizer==3.1.0
44
distlib==0.3.6
55
filelock==3.10.0
6-
flake8==3.9.2
6+
flake8==6.0.0
77
idna==3.4
88
iniconfig==2.0.0
9-
mccabe==0.6.1
9+
mccabe==0.7.0
1010
more-itertools==9.1.0
1111
packaging==23.0
1212
platformdirs==3.1.1
1313
pluggy==0.13.1
1414
py==1.11.0
15-
pycodestyle==2.7.0
16-
pyflakes==2.3.1
15+
pycodestyle==2.10.0
16+
pyflakes==3.0.1
1717
pytest==7.2.2
1818
requests==2.28.2
1919
six==1.16.0

scrapingbee/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.0.1"
1+
__version__ = "2.0.2"

scrapingbee/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def process_params(params: dict) -> dict:
4646
new_params[k] = process_cookies(v)
4747
elif k == 'extract_rules':
4848
new_params[k] = process_json_stringify_param(v, 'extract_rules')
49+
elif k == 'ai_extract_rules':
50+
new_params[k] = process_json_stringify_param(v, 'ai_extract_rules')
4951
elif k == 'js_scenario':
5052
new_params[k] = process_json_stringify_param(v, 'js_scenario')
5153
else:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@
3131
'Programming Language :: Python :: 3.11',
3232
'Topic :: Software Development :: Libraries :: Python Modules',
3333
],
34-
python_requires='>=3.7',
34+
python_requires='>=3.8',
3535
install_requires=['requests'],
3636
)

tests/test_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,27 @@ def test_get_with_js_scenario(mock_session, client):
113113
)
114114

115115

116+
@mock.patch('scrapingbee.client.Session')
117+
def test_get_with_ai_extract_rules(mock_session, client):
118+
'''It should format the ai_extract_rules and add them to the url'''
119+
client.get('https://httpbin.org', params={
120+
'ai_extract_rules': {
121+
"product_name": "The name of the product",
122+
"price": "The price in USD"
123+
}
124+
})
125+
126+
mock_session.return_value.request.assert_called_with(
127+
'GET',
128+
'https://app.scrapingbee.com/api/v1/'
129+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
130+
'ai_extract_rules=%7B%22product_name%22%3A+%22The+name+of+the+product%22%2C+%22'
131+
'price%22%3A+%22The+price+in+USD%22%7D',
132+
data=None,
133+
headers=DEFAULT_HEADERS,
134+
)
135+
136+
116137
@mock.patch('scrapingbee.client.Session')
117138
def test_post(mock_session, client):
118139
'''It should make a POST request with some data'''

tests/test_utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_process_headers():
1818
"""It should add a Spb- prefix to header names"""
1919
output = process_headers({"Accept-Language": "En-US"})
2020
assert output == {
21-
"User-Agent": "ScrapingBee-Python/2.0.1",
21+
"User-Agent": "ScrapingBee-Python/2.0.2",
2222
"Spb-Accept-Language": "En-US",
2323
}
2424

@@ -46,6 +46,13 @@ def test_process_js_scenario():
4646
assert output == '{"instructions": [{"click": "#buttonId"}]}'
4747

4848

49+
def test_process_ai_extract_rules():
50+
"""It should format ai_extract_rules to a stringified JSON"""
51+
output = process_json_stringify_param(
52+
{"product_name": "The name of the product", "price": "The price in USD"}, "ai_extract_rules")
53+
assert output == '{"product_name": "The name of the product", "price": "The price in USD"}'
54+
55+
4956
def test_process_params():
5057
"""It should keep boolean parameters"""
5158
output = process_params({"render_js": True})

0 commit comments

Comments
 (0)